VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 28800

Last change on this file since 28800 was 28800, checked in by vboxsync, 15 years ago

Automated rebranding to Oracle copyright/license strings via filemuncher

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 198.2 KB
Line 
1/* $Id: PGMAllPool.cpp 28800 2010-04-27 08:22:32Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#include <VBox/pgm.h>
24#include <VBox/mm.h>
25#include <VBox/em.h>
26#include <VBox/cpum.h>
27#ifdef IN_RC
28# include <VBox/patm.h>
29#endif
30#include "../PGMInternal.h"
31#include <VBox/vm.h>
32#include "../PGMInline.h"
33#include <VBox/disopcode.h>
34#include <VBox/hwacc_vmx.h>
35
36#include <VBox/log.h>
37#include <VBox/err.h>
38#include <iprt/asm.h>
39#include <iprt/string.h>
40
41
42/*******************************************************************************
43* Internal Functions *
44*******************************************************************************/
45RT_C_DECLS_BEGIN
46static void pgmPoolFlushAllInt(PPGMPOOL pPool);
47DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
48DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
49static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
50static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
51static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
52#ifndef IN_RING3
53DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
54#endif
55#ifdef LOG_ENABLED
56static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
57#endif
58#if defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)
59static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT);
60#endif
61
62int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
63PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
64void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
65void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
66
67RT_C_DECLS_END
68
69
70/**
71 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
72 *
73 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
74 * @param enmKind The page kind.
75 */
76DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
77{
78 switch (enmKind)
79 {
80 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
81 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
82 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
83 return true;
84 default:
85 return false;
86 }
87}
88
89/** @def PGMPOOL_PAGE_2_LOCKED_PTR
90 * Maps a pool page pool into the current context and lock it (RC only).
91 *
92 * @returns VBox status code.
93 * @param pVM The VM handle.
94 * @param pPage The pool page.
95 *
96 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
97 * small page window employeed by that function. Be careful.
98 * @remark There is no need to assert on the result.
99 */
100#if defined(IN_RC)
101DECLINLINE(void *) PGMPOOL_PAGE_2_LOCKED_PTR(PVM pVM, PPGMPOOLPAGE pPage)
102{
103 void *pv = pgmPoolMapPageInlined(&pVM->pgm.s, pPage);
104
105 /* Make sure the dynamic mapping will not be reused. */
106 if (pv)
107 PGMDynLockHCPage(pVM, (uint8_t *)pv);
108
109 return pv;
110}
111#else
112# define PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage) PGMPOOL_PAGE_2_PTR(pVM, pPage)
113#endif
114
115/** @def PGMPOOL_UNLOCK_PTR
116 * Unlock a previously locked dynamic caching (RC only).
117 *
118 * @returns VBox status code.
119 * @param pVM The VM handle.
120 * @param pPage The pool page.
121 *
122 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
123 * small page window employeed by that function. Be careful.
124 * @remark There is no need to assert on the result.
125 */
126#if defined(IN_RC)
127DECLINLINE(void) PGMPOOL_UNLOCK_PTR(PVM pVM, void *pvPage)
128{
129 if (pvPage)
130 PGMDynUnlockHCPage(pVM, (uint8_t *)pvPage);
131}
132#else
133# define PGMPOOL_UNLOCK_PTR(pVM, pPage) do {} while (0)
134#endif
135
136
137/**
138 * Flushes a chain of pages sharing the same access monitor.
139 *
140 * @returns VBox status code suitable for scheduling.
141 * @param pPool The pool.
142 * @param pPage A page in the chain.
143 */
144int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
145{
146 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
147
148 /*
149 * Find the list head.
150 */
151 uint16_t idx = pPage->idx;
152 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
153 {
154 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
155 {
156 idx = pPage->iMonitoredPrev;
157 Assert(idx != pPage->idx);
158 pPage = &pPool->aPages[idx];
159 }
160 }
161
162 /*
163 * Iterate the list flushing each shadow page.
164 */
165 int rc = VINF_SUCCESS;
166 for (;;)
167 {
168 idx = pPage->iMonitoredNext;
169 Assert(idx != pPage->idx);
170 if (pPage->idx >= PGMPOOL_IDX_FIRST)
171 {
172 int rc2 = pgmPoolFlushPage(pPool, pPage);
173 AssertRC(rc2);
174 }
175 /* next */
176 if (idx == NIL_PGMPOOL_IDX)
177 break;
178 pPage = &pPool->aPages[idx];
179 }
180 return rc;
181}
182
183
184/**
185 * Wrapper for getting the current context pointer to the entry being modified.
186 *
187 * @returns VBox status code suitable for scheduling.
188 * @param pVM VM Handle.
189 * @param pvDst Destination address
190 * @param pvSrc Source guest virtual address.
191 * @param GCPhysSrc The source guest physical address.
192 * @param cb Size of data to read
193 */
194DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
195{
196#if defined(IN_RING3)
197 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
198 return VINF_SUCCESS;
199#else
200 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
201 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
202#endif
203}
204
205/**
206 * Process shadow entries before they are changed by the guest.
207 *
208 * For PT entries we will clear them. For PD entries, we'll simply check
209 * for mapping conflicts and set the SyncCR3 FF if found.
210 *
211 * @param pVCpu VMCPU handle
212 * @param pPool The pool.
213 * @param pPage The head page.
214 * @param GCPhysFault The guest physical fault address.
215 * @param uAddress In R0 and GC this is the guest context fault address (flat).
216 * In R3 this is the host context 'fault' address.
217 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
218 */
219void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, unsigned cbWrite)
220{
221 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%#x (idx=%#x)\n", pPage->iMonitoredPrev, pPage->idx));
222 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
223 PVM pVM = pPool->CTX_SUFF(pVM);
224
225 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))pvAddress, GCPhysFault, cbWrite));
226
227 for (;;)
228 {
229 union
230 {
231 void *pv;
232 PX86PT pPT;
233 PX86PTPAE pPTPae;
234 PX86PD pPD;
235 PX86PDPAE pPDPae;
236 PX86PDPT pPDPT;
237 PX86PML4 pPML4;
238 } uShw;
239
240 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s\n", pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
241
242 uShw.pv = NULL;
243 switch (pPage->enmKind)
244 {
245 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
246 {
247 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
248 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
249 const unsigned iShw = off / sizeof(X86PTE);
250 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
251 if (uShw.pPT->a[iShw].n.u1Present)
252 {
253 X86PTE GstPte;
254
255 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
256 AssertRC(rc);
257 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
258 pgmPoolTracDerefGCPhysHint(pPool, pPage,
259 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
260 GstPte.u & X86_PTE_PG_MASK,
261 iShw);
262 ASMAtomicWriteSize(&uShw.pPT->a[iShw], 0);
263 }
264 break;
265 }
266
267 /* page/2 sized */
268 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
269 {
270 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
271 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
272 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
273 {
274 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
275 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
276 if (uShw.pPTPae->a[iShw].n.u1Present)
277 {
278 X86PTE GstPte;
279 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
280 AssertRC(rc);
281
282 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
283 pgmPoolTracDerefGCPhysHint(pPool, pPage,
284 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
285 GstPte.u & X86_PTE_PG_MASK,
286 iShw);
287 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw], 0);
288 }
289 }
290 break;
291 }
292
293 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
294 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
295 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
296 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
297 {
298 unsigned iGst = off / sizeof(X86PDE);
299 unsigned iShwPdpt = iGst / 256;
300 unsigned iShw = (iGst % 256) * 2;
301 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
302
303 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
304 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
305 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
306 {
307 for (unsigned i = 0; i < 2; i++)
308 {
309# ifndef IN_RING0
310 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
311 {
312 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
313 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
314 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
315 break;
316 }
317 else
318# endif /* !IN_RING0 */
319 if (uShw.pPDPae->a[iShw+i].n.u1Present)
320 {
321 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
322 pgmPoolFree(pVM,
323 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
324 pPage->idx,
325 iShw + i);
326 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw+i], 0);
327 }
328
329 /* paranoia / a bit assumptive. */
330 if ( (off & 3)
331 && (off & 3) + cbWrite > 4)
332 {
333 const unsigned iShw2 = iShw + 2 + i;
334 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
335 {
336# ifndef IN_RING0
337 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
338 {
339 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
340 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
341 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
342 break;
343 }
344 else
345# endif /* !IN_RING0 */
346 if (uShw.pPDPae->a[iShw2].n.u1Present)
347 {
348 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
349 pgmPoolFree(pVM,
350 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
351 pPage->idx,
352 iShw2);
353 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
354 }
355 }
356 }
357 }
358 }
359 break;
360 }
361
362 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
363 {
364 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
365 const unsigned iShw = off / sizeof(X86PTEPAE);
366 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
367 if (uShw.pPTPae->a[iShw].n.u1Present)
368 {
369 X86PTEPAE GstPte;
370 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
371 AssertRC(rc);
372
373 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
374 pgmPoolTracDerefGCPhysHint(pPool, pPage,
375 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
376 GstPte.u & X86_PTE_PAE_PG_MASK,
377 iShw);
378 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw].u, 0);
379 }
380
381 /* paranoia / a bit assumptive. */
382 if ( (off & 7)
383 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
384 {
385 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
386 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
387
388 if (uShw.pPTPae->a[iShw2].n.u1Present)
389 {
390 X86PTEPAE GstPte;
391# ifdef IN_RING3
392 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
393# else
394 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
395# endif
396 AssertRC(rc);
397 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
398 pgmPoolTracDerefGCPhysHint(pPool, pPage,
399 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
400 GstPte.u & X86_PTE_PAE_PG_MASK,
401 iShw2);
402 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw2].u ,0);
403 }
404 }
405 break;
406 }
407
408 case PGMPOOLKIND_32BIT_PD:
409 {
410 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
411 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
412
413 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
414 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
415# ifndef IN_RING0
416 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
417 {
418 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
419 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
420 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
421 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
422 break;
423 }
424# endif /* !IN_RING0 */
425# ifndef IN_RING0
426 else
427# endif /* !IN_RING0 */
428 {
429 if (uShw.pPD->a[iShw].n.u1Present)
430 {
431 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
432 pgmPoolFree(pVM,
433 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
434 pPage->idx,
435 iShw);
436 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
437 }
438 }
439 /* paranoia / a bit assumptive. */
440 if ( (off & 3)
441 && (off & 3) + cbWrite > sizeof(X86PTE))
442 {
443 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
444 if ( iShw2 != iShw
445 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
446 {
447# ifndef IN_RING0
448 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
449 {
450 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
451 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
452 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
453 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
454 break;
455 }
456# endif /* !IN_RING0 */
457# ifndef IN_RING0
458 else
459# endif /* !IN_RING0 */
460 {
461 if (uShw.pPD->a[iShw2].n.u1Present)
462 {
463 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
464 pgmPoolFree(pVM,
465 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
466 pPage->idx,
467 iShw2);
468 ASMAtomicWriteSize(&uShw.pPD->a[iShw2].u, 0);
469 }
470 }
471 }
472 }
473#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
474 if ( uShw.pPD->a[iShw].n.u1Present
475 && !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
476 {
477 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
478# ifdef IN_RC /* TLB load - we're pushing things a bit... */
479 ASMProbeReadByte(pvAddress);
480# endif
481 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
482 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
483 }
484#endif
485 break;
486 }
487
488 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
489 {
490 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
491 const unsigned iShw = off / sizeof(X86PDEPAE);
492 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
493#ifndef IN_RING0
494 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
495 {
496 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
497 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
498 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
499 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
500 break;
501 }
502#endif /* !IN_RING0 */
503 /*
504 * Causes trouble when the guest uses a PDE to refer to the whole page table level
505 * structure. (Invalidate here; faults later on when it tries to change the page
506 * table entries -> recheck; probably only applies to the RC case.)
507 */
508# ifndef IN_RING0
509 else
510# endif /* !IN_RING0 */
511 {
512 if (uShw.pPDPae->a[iShw].n.u1Present)
513 {
514 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
515 pgmPoolFree(pVM,
516 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
517 pPage->idx,
518 iShw);
519 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
520 }
521 }
522 /* paranoia / a bit assumptive. */
523 if ( (off & 7)
524 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
525 {
526 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
527 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
528
529#ifndef IN_RING0
530 if ( iShw2 != iShw
531 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
532 {
533 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
534 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
535 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
536 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
537 break;
538 }
539#endif /* !IN_RING0 */
540# ifndef IN_RING0
541 else
542# endif /* !IN_RING0 */
543 if (uShw.pPDPae->a[iShw2].n.u1Present)
544 {
545 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
546 pgmPoolFree(pVM,
547 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
548 pPage->idx,
549 iShw2);
550 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
551 }
552 }
553 break;
554 }
555
556 case PGMPOOLKIND_PAE_PDPT:
557 {
558 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
559 /*
560 * Hopefully this doesn't happen very often:
561 * - touching unused parts of the page
562 * - messing with the bits of pd pointers without changing the physical address
563 */
564 /* PDPT roots are not page aligned; 32 byte only! */
565 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
566
567 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
568 const unsigned iShw = offPdpt / sizeof(X86PDPE);
569 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
570 {
571# ifndef IN_RING0
572 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
573 {
574 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
575 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
576 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
577 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
578 break;
579 }
580# endif /* !IN_RING0 */
581# ifndef IN_RING0
582 else
583# endif /* !IN_RING0 */
584 if (uShw.pPDPT->a[iShw].n.u1Present)
585 {
586 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
587 pgmPoolFree(pVM,
588 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
589 pPage->idx,
590 iShw);
591 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
592 }
593
594 /* paranoia / a bit assumptive. */
595 if ( (offPdpt & 7)
596 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
597 {
598 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
599 if ( iShw2 != iShw
600 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
601 {
602# ifndef IN_RING0
603 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
604 {
605 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
606 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
607 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
608 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
609 break;
610 }
611# endif /* !IN_RING0 */
612# ifndef IN_RING0
613 else
614# endif /* !IN_RING0 */
615 if (uShw.pPDPT->a[iShw2].n.u1Present)
616 {
617 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
618 pgmPoolFree(pVM,
619 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
620 pPage->idx,
621 iShw2);
622 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
623 }
624 }
625 }
626 }
627 break;
628 }
629
630#ifndef IN_RC
631 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
632 {
633 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
634 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
635 const unsigned iShw = off / sizeof(X86PDEPAE);
636 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
637 if (uShw.pPDPae->a[iShw].n.u1Present)
638 {
639 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
640 pgmPoolFree(pVM,
641 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
642 pPage->idx,
643 iShw);
644 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
645 }
646 /* paranoia / a bit assumptive. */
647 if ( (off & 7)
648 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
649 {
650 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
651 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
652
653 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
654 if (uShw.pPDPae->a[iShw2].n.u1Present)
655 {
656 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
657 pgmPoolFree(pVM,
658 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
659 pPage->idx,
660 iShw2);
661 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
662 }
663 }
664 break;
665 }
666
667 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
668 {
669 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
670 /*
671 * Hopefully this doesn't happen very often:
672 * - messing with the bits of pd pointers without changing the physical address
673 */
674 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
675 const unsigned iShw = off / sizeof(X86PDPE);
676 if (uShw.pPDPT->a[iShw].n.u1Present)
677 {
678 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
679 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
680 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
681 }
682 /* paranoia / a bit assumptive. */
683 if ( (off & 7)
684 && (off & 7) + cbWrite > sizeof(X86PDPE))
685 {
686 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
687 if (uShw.pPDPT->a[iShw2].n.u1Present)
688 {
689 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
690 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
691 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
692 }
693 }
694 break;
695 }
696
697 case PGMPOOLKIND_64BIT_PML4:
698 {
699 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
700 /*
701 * Hopefully this doesn't happen very often:
702 * - messing with the bits of pd pointers without changing the physical address
703 */
704 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
705 const unsigned iShw = off / sizeof(X86PDPE);
706 if (uShw.pPML4->a[iShw].n.u1Present)
707 {
708 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
709 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
710 ASMAtomicWriteSize(&uShw.pPML4->a[iShw].u, 0);
711 }
712 /* paranoia / a bit assumptive. */
713 if ( (off & 7)
714 && (off & 7) + cbWrite > sizeof(X86PDPE))
715 {
716 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
717 if (uShw.pPML4->a[iShw2].n.u1Present)
718 {
719 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
720 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
721 ASMAtomicWriteSize(&uShw.pPML4->a[iShw2].u, 0);
722 }
723 }
724 break;
725 }
726#endif /* IN_RING0 */
727
728 default:
729 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
730 }
731 PGMPOOL_UNLOCK_PTR(pVM, uShw.pv);
732
733 /* next */
734 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
735 return;
736 pPage = &pPool->aPages[pPage->iMonitoredNext];
737 }
738}
739
740# ifndef IN_RING3
741/**
742 * Checks if a access could be a fork operation in progress.
743 *
744 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
745 *
746 * @returns true if it's likly that we're forking, otherwise false.
747 * @param pPool The pool.
748 * @param pDis The disassembled instruction.
749 * @param offFault The access offset.
750 */
751DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
752{
753 /*
754 * i386 linux is using btr to clear X86_PTE_RW.
755 * The functions involved are (2.6.16 source inspection):
756 * clear_bit
757 * ptep_set_wrprotect
758 * copy_one_pte
759 * copy_pte_range
760 * copy_pmd_range
761 * copy_pud_range
762 * copy_page_range
763 * dup_mmap
764 * dup_mm
765 * copy_mm
766 * copy_process
767 * do_fork
768 */
769 if ( pDis->pCurInstr->opcode == OP_BTR
770 && !(offFault & 4)
771 /** @todo Validate that the bit index is X86_PTE_RW. */
772 )
773 {
774 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
775 return true;
776 }
777 return false;
778}
779
780
781/**
782 * Determine whether the page is likely to have been reused.
783 *
784 * @returns true if we consider the page as being reused for a different purpose.
785 * @returns false if we consider it to still be a paging page.
786 * @param pVM VM Handle.
787 * @param pVCpu VMCPU Handle.
788 * @param pRegFrame Trap register frame.
789 * @param pDis The disassembly info for the faulting instruction.
790 * @param pvFault The fault address.
791 *
792 * @remark The REP prefix check is left to the caller because of STOSD/W.
793 */
794DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
795{
796#ifndef IN_RC
797 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
798 if ( HWACCMHasPendingIrq(pVM)
799 && (pRegFrame->rsp - pvFault) < 32)
800 {
801 /* Fault caused by stack writes while trying to inject an interrupt event. */
802 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
803 return true;
804 }
805#else
806 NOREF(pVM); NOREF(pvFault);
807#endif
808
809 LogFlow(("Reused instr %RGv %d at %RGv param1.flags=%x param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->opcode, pvFault, pDis->param1.flags, pDis->param1.base.reg_gen));
810
811 /* Non-supervisor mode write means it's used for something else. */
812 if (CPUMGetGuestCPL(pVCpu, pRegFrame) != 0)
813 return true;
814
815 switch (pDis->pCurInstr->opcode)
816 {
817 /* call implies the actual push of the return address faulted */
818 case OP_CALL:
819 Log4(("pgmPoolMonitorIsReused: CALL\n"));
820 return true;
821 case OP_PUSH:
822 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
823 return true;
824 case OP_PUSHF:
825 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
826 return true;
827 case OP_PUSHA:
828 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
829 return true;
830 case OP_FXSAVE:
831 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
832 return true;
833 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
834 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
835 return true;
836 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
837 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
838 return true;
839 case OP_MOVSWD:
840 case OP_STOSWD:
841 if ( pDis->prefix == (PREFIX_REP|PREFIX_REX)
842 && pRegFrame->rcx >= 0x40
843 )
844 {
845 Assert(pDis->mode == CPUMODE_64BIT);
846
847 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
848 return true;
849 }
850 return false;
851 }
852 if ( ( (pDis->param1.flags & USE_REG_GEN32)
853 || (pDis->param1.flags & USE_REG_GEN64))
854 && (pDis->param1.base.reg_gen == USE_REG_ESP))
855 {
856 Log4(("pgmPoolMonitorIsReused: ESP\n"));
857 return true;
858 }
859
860 return false;
861}
862
863/**
864 * Flushes the page being accessed.
865 *
866 * @returns VBox status code suitable for scheduling.
867 * @param pVM The VM handle.
868 * @param pVCpu The VMCPU handle.
869 * @param pPool The pool.
870 * @param pPage The pool page (head).
871 * @param pDis The disassembly of the write instruction.
872 * @param pRegFrame The trap register frame.
873 * @param GCPhysFault The fault address as guest physical address.
874 * @param pvFault The fault address.
875 */
876static int pgmPoolAccessHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
877 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
878{
879 /*
880 * First, do the flushing.
881 */
882 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
883
884 /*
885 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection). Must do this in raw mode (!); XP boot will fail otherwise
886 */
887 uint32_t cbWritten;
888 int rc2 = EMInterpretInstructionCPUEx(pVM, pVCpu, pDis, pRegFrame, pvFault, &cbWritten, EMCODETYPE_ALL);
889 if (RT_SUCCESS(rc2))
890 pRegFrame->rip += pDis->opsize;
891 else if (rc2 == VERR_EM_INTERPRETER)
892 {
893#ifdef IN_RC
894 if (PATMIsPatchGCAddr(pVM, pRegFrame->eip))
895 {
896 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
897 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
898 rc = VINF_SUCCESS;
899 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
900 }
901 else
902#endif
903 {
904 rc = VINF_EM_RAW_EMULATE_INSTR;
905 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
906 }
907 }
908 else
909 rc = rc2;
910
911 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
912 return rc;
913}
914
915/**
916 * Handles the STOSD write accesses.
917 *
918 * @returns VBox status code suitable for scheduling.
919 * @param pVM The VM handle.
920 * @param pPool The pool.
921 * @param pPage The pool page (head).
922 * @param pDis The disassembly of the write instruction.
923 * @param pRegFrame The trap register frame.
924 * @param GCPhysFault The fault address as guest physical address.
925 * @param pvFault The fault address.
926 */
927DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
928 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
929{
930 unsigned uIncrement = pDis->param1.size;
931
932 Assert(pDis->mode == CPUMODE_32BIT || pDis->mode == CPUMODE_64BIT);
933 Assert(pRegFrame->rcx <= 0x20);
934
935#ifdef VBOX_STRICT
936 if (pDis->opmode == CPUMODE_32BIT)
937 Assert(uIncrement == 4);
938 else
939 Assert(uIncrement == 8);
940#endif
941
942 Log3(("pgmPoolAccessHandlerSTOSD\n"));
943
944 /*
945 * Increment the modification counter and insert it into the list
946 * of modified pages the first time.
947 */
948 if (!pPage->cModifications++)
949 pgmPoolMonitorModifiedInsert(pPool, pPage);
950
951 /*
952 * Execute REP STOSD.
953 *
954 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
955 * write situation, meaning that it's safe to write here.
956 */
957 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
958 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
959 while (pRegFrame->rcx)
960 {
961#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
962 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
963 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
964 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
965#else
966 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
967#endif
968#ifdef IN_RC
969 *(uint32_t *)(uintptr_t)pu32 = pRegFrame->eax;
970#else
971 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
972#endif
973 pu32 += uIncrement;
974 GCPhysFault += uIncrement;
975 pRegFrame->rdi += uIncrement;
976 pRegFrame->rcx--;
977 }
978 pRegFrame->rip += pDis->opsize;
979
980 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
981 return VINF_SUCCESS;
982}
983
984
985/**
986 * Handles the simple write accesses.
987 *
988 * @returns VBox status code suitable for scheduling.
989 * @param pVM The VM handle.
990 * @param pVCpu The VMCPU handle.
991 * @param pPool The pool.
992 * @param pPage The pool page (head).
993 * @param pDis The disassembly of the write instruction.
994 * @param pRegFrame The trap register frame.
995 * @param GCPhysFault The fault address as guest physical address.
996 * @param pvFault The fault address.
997 * @param pfReused Reused state (out)
998 */
999DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
1000 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
1001{
1002 Log3(("pgmPoolAccessHandlerSimple\n"));
1003 /*
1004 * Increment the modification counter and insert it into the list
1005 * of modified pages the first time.
1006 */
1007 if (!pPage->cModifications++)
1008 pgmPoolMonitorModifiedInsert(pPool, pPage);
1009
1010 /*
1011 * Clear all the pages. ASSUMES that pvFault is readable.
1012 */
1013#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1014 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1015 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
1016 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1017#else
1018 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
1019#endif
1020
1021 /*
1022 * Interpret the instruction.
1023 */
1024 uint32_t cb;
1025 int rc = EMInterpretInstructionCPUEx(pVM, pVCpu, pDis, pRegFrame, pvFault, &cb, EMCODETYPE_ALL);
1026 if (RT_SUCCESS(rc))
1027 pRegFrame->rip += pDis->opsize;
1028 else if (rc == VERR_EM_INTERPRETER)
1029 {
1030 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1031 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode));
1032 rc = VINF_EM_RAW_EMULATE_INSTR;
1033 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1034 }
1035
1036#if 0 /* experimental code */
1037 if (rc == VINF_SUCCESS)
1038 {
1039 switch (pPage->enmKind)
1040 {
1041 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1042 {
1043 X86PTEPAE GstPte;
1044 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1045 AssertRC(rc);
1046
1047 /* Check the new value written by the guest. If present and with a bogus physical address, then
1048 * it's fairly safe to assume the guest is reusing the PT.
1049 */
1050 if (GstPte.n.u1Present)
1051 {
1052 RTHCPHYS HCPhys = -1;
1053 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1054 if (rc != VINF_SUCCESS)
1055 {
1056 *pfReused = true;
1057 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1058 }
1059 }
1060 break;
1061 }
1062 }
1063 }
1064#endif
1065
1066 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
1067 return rc;
1068}
1069
1070/**
1071 * \#PF Handler callback for PT write accesses.
1072 *
1073 * @returns VBox status code (appropriate for GC return).
1074 * @param pVM VM Handle.
1075 * @param uErrorCode CPU Error code.
1076 * @param pRegFrame Trap register frame.
1077 * NULL on DMA and other non CPU access.
1078 * @param pvFault The fault address (cr2).
1079 * @param GCPhysFault The GC physical address corresponding to pvFault.
1080 * @param pvUser User argument.
1081 */
1082DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1083{
1084 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1085 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1086 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1087 PVMCPU pVCpu = VMMGetCpu(pVM);
1088 unsigned cMaxModifications;
1089 bool fForcedFlush = false;
1090
1091 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1092
1093 pgmLock(pVM);
1094 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1095 {
1096 /* Pool page changed while we were waiting for the lock; ignore. */
1097 Log(("CPU%d: pgmPoolAccessHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1098 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1099 pgmUnlock(pVM);
1100 return VINF_SUCCESS;
1101 }
1102#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1103 if (pPage->fDirty)
1104 {
1105 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH));
1106 pgmUnlock(pVM);
1107 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1108 }
1109#endif
1110
1111#if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1112 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1113 {
1114 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1115 void *pvGst;
1116 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1117 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
1118 }
1119#endif
1120
1121 /*
1122 * Disassemble the faulting instruction.
1123 */
1124 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1125 int rc = EMInterpretDisasOne(pVM, pVCpu, pRegFrame, pDis, NULL);
1126 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1127 {
1128 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1129 pgmUnlock(pVM);
1130 return rc;
1131 }
1132
1133 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1134
1135 /*
1136 * We should ALWAYS have the list head as user parameter. This
1137 * is because we use that page to record the changes.
1138 */
1139 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1140
1141#ifdef IN_RING0
1142 /* Maximum nr of modifications depends on the page type. */
1143 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1144 cMaxModifications = 4;
1145 else
1146 cMaxModifications = 24;
1147#else
1148 cMaxModifications = 48;
1149#endif
1150
1151 /*
1152 * Incremental page table updates should weigh more than random ones.
1153 * (Only applies when started from offset 0)
1154 */
1155 pVCpu->pgm.s.cPoolAccessHandler++;
1156 if ( pPage->pvLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1157 && pPage->pvLastAccessHandlerRip < pRegFrame->rip + 0x40
1158 && pvFault == (pPage->pvLastAccessHandlerFault + pDis->param1.size)
1159 && pVCpu->pgm.s.cPoolAccessHandler == (pPage->cLastAccessHandlerCount + 1))
1160 {
1161 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1162 Assert(pPage->cModifications < 32000);
1163 pPage->cModifications = pPage->cModifications * 2;
1164 pPage->pvLastAccessHandlerFault = pvFault;
1165 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1166 if (pPage->cModifications >= cMaxModifications)
1167 {
1168 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1169 fForcedFlush = true;
1170 }
1171 }
1172
1173 if (pPage->cModifications >= cMaxModifications)
1174 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1175
1176 /*
1177 * Check if it's worth dealing with.
1178 */
1179 bool fReused = false;
1180 bool fNotReusedNotForking = false;
1181 if ( ( pPage->cModifications < cMaxModifications /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1182 || pgmPoolIsPageLocked(&pVM->pgm.s, pPage)
1183 )
1184 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1185 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1186 {
1187 /*
1188 * Simple instructions, no REP prefix.
1189 */
1190 if (!(pDis->prefix & (PREFIX_REP | PREFIX_REPNE)))
1191 {
1192 rc = pgmPoolAccessHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1193 if (fReused)
1194 goto flushPage;
1195
1196 /* A mov instruction to change the first page table entry will be remembered so we can detect
1197 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1198 */
1199 if ( rc == VINF_SUCCESS
1200 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1201 && pDis->pCurInstr->opcode == OP_MOV
1202 && (pvFault & PAGE_OFFSET_MASK) == 0)
1203 {
1204 pPage->pvLastAccessHandlerFault = pvFault;
1205 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1206 pPage->pvLastAccessHandlerRip = pRegFrame->rip;
1207 /* Make sure we don't kick out a page too quickly. */
1208 if (pPage->cModifications > 8)
1209 pPage->cModifications = 2;
1210 }
1211 else
1212 if (pPage->pvLastAccessHandlerFault == pvFault)
1213 {
1214 /* ignore the 2nd write to this page table entry. */
1215 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1216 }
1217 else
1218 {
1219 pPage->pvLastAccessHandlerFault = 0;
1220 pPage->pvLastAccessHandlerRip = 0;
1221 }
1222
1223 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1224 pgmUnlock(pVM);
1225 return rc;
1226 }
1227
1228 /*
1229 * Windows is frequently doing small memset() operations (netio test 4k+).
1230 * We have to deal with these or we'll kill the cache and performance.
1231 */
1232 if ( pDis->pCurInstr->opcode == OP_STOSWD
1233 && !pRegFrame->eflags.Bits.u1DF
1234 && pDis->opmode == pDis->mode
1235 && pDis->addrmode == pDis->mode)
1236 {
1237 bool fValidStosd = false;
1238
1239 if ( pDis->mode == CPUMODE_32BIT
1240 && pDis->prefix == PREFIX_REP
1241 && pRegFrame->ecx <= 0x20
1242 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1243 && !((uintptr_t)pvFault & 3)
1244 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1245 )
1246 {
1247 fValidStosd = true;
1248 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1249 }
1250 else
1251 if ( pDis->mode == CPUMODE_64BIT
1252 && pDis->prefix == (PREFIX_REP | PREFIX_REX)
1253 && pRegFrame->rcx <= 0x20
1254 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1255 && !((uintptr_t)pvFault & 7)
1256 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1257 )
1258 {
1259 fValidStosd = true;
1260 }
1261
1262 if (fValidStosd)
1263 {
1264 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1265 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1266 pgmUnlock(pVM);
1267 return rc;
1268 }
1269 }
1270
1271 /* REP prefix, don't bother. */
1272 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1273 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1274 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode, pDis->prefix));
1275 fNotReusedNotForking = true;
1276 }
1277
1278#if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1279 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1280 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1281 */
1282 if ( pPage->cModifications >= cMaxModifications
1283 && !fForcedFlush
1284 && pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1285 && ( fNotReusedNotForking
1286 || ( !pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
1287 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1288 )
1289 )
1290 {
1291 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
1292 Assert(pPage->fDirty == false);
1293
1294 /* Flush any monitored duplicates as we will disable write protection. */
1295 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1296 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1297 {
1298 PPGMPOOLPAGE pPageHead = pPage;
1299
1300 /* Find the monitor head. */
1301 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1302 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1303
1304 while (pPageHead)
1305 {
1306 unsigned idxNext = pPageHead->iMonitoredNext;
1307
1308 if (pPageHead != pPage)
1309 {
1310 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1311 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1312 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1313 AssertRC(rc2);
1314 }
1315
1316 if (idxNext == NIL_PGMPOOL_IDX)
1317 break;
1318
1319 pPageHead = &pPool->aPages[idxNext];
1320 }
1321 }
1322
1323 /* The flushing above might fail for locked pages, so double check. */
1324 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1325 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1326 {
1327 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1328
1329 /* Temporarily allow write access to the page table again. */
1330 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys, pPage->GCPhys);
1331 if (rc == VINF_SUCCESS)
1332 {
1333 rc = PGMShwModifyPage(pVCpu, pvFault, 1, X86_PTE_RW, ~(uint64_t)X86_PTE_RW);
1334 AssertMsg(rc == VINF_SUCCESS
1335 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1336 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1337 || rc == VERR_PAGE_NOT_PRESENT,
1338 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1339
1340 pPage->pvDirtyFault = pvFault;
1341
1342 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1343 pgmUnlock(pVM);
1344 return rc;
1345 }
1346 }
1347 }
1348#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1349
1350 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushModOverflow));
1351flushPage:
1352 /*
1353 * Not worth it, so flush it.
1354 *
1355 * If we considered it to be reused, don't go back to ring-3
1356 * to emulate failed instructions since we usually cannot
1357 * interpret then. This may be a bit risky, in which case
1358 * the reuse detection must be fixed.
1359 */
1360 rc = pgmPoolAccessHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1361 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1362 && fReused)
1363 {
1364 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1365 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1366 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1367 }
1368 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1369 pgmUnlock(pVM);
1370 return rc;
1371}
1372
1373# endif /* !IN_RING3 */
1374
1375# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1376
1377# ifdef VBOX_STRICT
1378/**
1379 * Check references to guest physical memory in a PAE / PAE page table.
1380 *
1381 * @param pPool The pool.
1382 * @param pPage The page.
1383 * @param pShwPT The shadow page table (mapping of the page).
1384 * @param pGstPT The guest page table.
1385 */
1386static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
1387{
1388 unsigned cErrors = 0;
1389 int LastRc = -1; /* initialized to shut up gcc */
1390 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1391 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1392
1393#ifdef VBOX_STRICT
1394 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1395 AssertMsg(!pShwPT->a[i].n.u1Present, ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, pShwPT->a[i].u, pPage->iFirstPresent));
1396#endif
1397 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1398 {
1399 if (pShwPT->a[i].n.u1Present)
1400 {
1401 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1402 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1403 if ( rc != VINF_SUCCESS
1404 || (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) != HCPhys)
1405 {
1406 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, pShwPT->a[i].u, HCPhys));
1407 LastPTE = i;
1408 LastRc = rc;
1409 LastHCPhys = HCPhys;
1410 cErrors++;
1411
1412 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1413 rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pPage->GCPhys, &HCPhysPT);
1414 AssertRC(rc);
1415
1416 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1417 {
1418 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1419
1420 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1421 {
1422 PX86PTPAE pShwPT2 = (PX86PTPAE)PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pTempPage);
1423
1424 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1425 {
1426 if ( pShwPT2->a[j].n.u1Present
1427 && pShwPT2->a[j].n.u1Write
1428 && ((pShwPT2->a[j].u & X86_PTE_PAE_PG_MASK) == HCPhysPT))
1429 {
1430 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, pShwPT->a[j].u, pShwPT2->a[j].u));
1431 }
1432 }
1433 }
1434 }
1435 }
1436 }
1437 }
1438 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, pShwPT->a[LastPTE].u, LastHCPhys));
1439}
1440# endif /* VBOX_STRICT */
1441
1442/**
1443 * Clear references to guest physical memory in a PAE / PAE page table.
1444 *
1445 * @returns nr of changed PTEs
1446 * @param pPool The pool.
1447 * @param pPage The page.
1448 * @param pShwPT The shadow page table (mapping of the page).
1449 * @param pGstPT The guest page table.
1450 * @param pOldGstPT The old cached guest page table.
1451 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1452 * @param pfFlush Flush reused page table (out)
1453 */
1454DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT, PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1455{
1456 unsigned cChanged = 0;
1457
1458#ifdef VBOX_STRICT
1459 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1460 AssertMsg(!pShwPT->a[i].n.u1Present, ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, pShwPT->a[i].u, pPage->iFirstPresent));
1461#endif
1462 *pfFlush = false;
1463
1464 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1465 {
1466 /* Check the new value written by the guest. If present and with a bogus physical address, then
1467 * it's fairly safe to assume the guest is reusing the PT.
1468 */
1469 if ( fAllowRemoval
1470 && pGstPT->a[i].n.u1Present)
1471 {
1472 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1473 {
1474 *pfFlush = true;
1475 return ++cChanged;
1476 }
1477 }
1478 if (pShwPT->a[i].n.u1Present)
1479 {
1480 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1481 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1482 {
1483#ifdef VBOX_STRICT
1484 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1485 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1486 AssertMsg(rc == VINF_SUCCESS && (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, pShwPT->a[i].u, HCPhys));
1487#endif
1488 uint64_t uHostAttr = pShwPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1489 bool fHostRW = !!(pShwPT->a[i].u & X86_PTE_RW);
1490 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1491 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1492
1493 if ( uHostAttr == uGuestAttr
1494 && fHostRW <= fGuestRW)
1495 continue;
1496 }
1497 cChanged++;
1498 /* Something was changed, so flush it. */
1499 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1500 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1501 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1502 ASMAtomicWriteSize(&pShwPT->a[i].u, 0);
1503 }
1504 }
1505 return cChanged;
1506}
1507
1508
1509/**
1510 * Flush a dirty page
1511 *
1512 * @param pVM VM Handle.
1513 * @param pPool The pool.
1514 * @param idxSlot Dirty array slot index
1515 * @param fAllowRemoval Allow a reused page table to be removed
1516 */
1517static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1518{
1519 PPGMPOOLPAGE pPage;
1520 unsigned idxPage;
1521
1522 Assert(idxSlot < RT_ELEMENTS(pPool->aIdxDirtyPages));
1523 if (pPool->aIdxDirtyPages[idxSlot] == NIL_PGMPOOL_IDX)
1524 return;
1525
1526 idxPage = pPool->aIdxDirtyPages[idxSlot];
1527 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1528 pPage = &pPool->aPages[idxPage];
1529 Assert(pPage->idx == idxPage);
1530 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1531
1532 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1533 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1534
1535 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1536 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys);
1537 Assert(rc == VINF_SUCCESS);
1538 pPage->fDirty = false;
1539
1540#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1541 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(VMMGetCpu(pVM));
1542#endif
1543
1544#ifdef VBOX_STRICT
1545 uint64_t fFlags = 0;
1546 RTHCPHYS HCPhys;
1547 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->pvDirtyFault, &fFlags, &HCPhys);
1548 AssertMsg( ( rc == VINF_SUCCESS
1549 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1550 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1551 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1552 || rc == VERR_PAGE_NOT_PRESENT,
1553 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->pvDirtyFault, rc, fFlags));
1554#endif
1555
1556 /* Flush those PTEs that have changed. */
1557 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1558 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1559 void *pvGst;
1560 bool fFlush;
1561 rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1562 unsigned cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst, (PCX86PTPAE)&pPool->aDirtyPages[idxSlot][0], fAllowRemoval, &fFlush);
1563 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1564 /** Note: we might want to consider keeping the dirty page active in case there were many changes. */
1565
1566 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1567 Assert(pPage->cModifications);
1568 if (cChanges < 4)
1569 pPage->cModifications = 1; /* must use > 0 here */
1570 else
1571 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1572
1573 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1574 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages))
1575 pPool->idxFreeDirtyPage = idxSlot;
1576
1577 pPool->cDirtyPages--;
1578 pPool->aIdxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1579 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1580 if (fFlush)
1581 {
1582 Assert(fAllowRemoval);
1583 Log(("Flush reused page table!\n"));
1584 pgmPoolFlushPage(pPool, pPage);
1585 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1586 }
1587 else
1588 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1589
1590#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1591 PGMDynMapPopAutoSubset(VMMGetCpu(pVM), iPrevSubset);
1592#endif
1593}
1594
1595# ifndef IN_RING3
1596/**
1597 * Add a new dirty page
1598 *
1599 * @param pVM VM Handle.
1600 * @param pPool The pool.
1601 * @param pPage The page.
1602 */
1603void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1604{
1605 unsigned idxFree;
1606
1607 Assert(PGMIsLocked(pVM));
1608 AssertCompile(RT_ELEMENTS(pPool->aIdxDirtyPages) == 8 || RT_ELEMENTS(pPool->aIdxDirtyPages) == 16);
1609 Assert(!pPage->fDirty);
1610
1611 idxFree = pPool->idxFreeDirtyPage;
1612 Assert(idxFree < RT_ELEMENTS(pPool->aIdxDirtyPages));
1613 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1614
1615 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aIdxDirtyPages))
1616 {
1617 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1618 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1619 }
1620 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages));
1621 AssertMsg(pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1622
1623 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1624
1625 /* Make a copy of the guest page table as we require valid GCPhys addresses when removing
1626 * references to physical pages. (the HCPhys linear lookup is *extremely* expensive!)
1627 */
1628 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1629 void *pvGst;
1630 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1631 memcpy(&pPool->aDirtyPages[idxFree][0], pvGst, PAGE_SIZE);
1632#ifdef VBOX_STRICT
1633 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
1634#endif
1635
1636 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1637 pPage->fDirty = true;
1638 pPage->idxDirty = idxFree;
1639 pPool->aIdxDirtyPages[idxFree] = pPage->idx;
1640 pPool->cDirtyPages++;
1641
1642 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1643 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages)
1644 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1645 {
1646 unsigned i;
1647 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1648 {
1649 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1650 if (pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1651 {
1652 pPool->idxFreeDirtyPage = idxFree;
1653 break;
1654 }
1655 }
1656 Assert(i != RT_ELEMENTS(pPool->aIdxDirtyPages));
1657 }
1658
1659 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages) || pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1660 return;
1661}
1662# endif /* !IN_RING3 */
1663
1664/**
1665 * Check if the specified page is dirty (not write monitored)
1666 *
1667 * @return dirty or not
1668 * @param pVM VM Handle.
1669 * @param GCPhys Guest physical address
1670 */
1671bool pgmPoolIsDirtyPage(PVM pVM, RTGCPHYS GCPhys)
1672{
1673 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1674 Assert(PGMIsLocked(pVM));
1675 if (!pPool->cDirtyPages)
1676 return false;
1677
1678 GCPhys = GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1679
1680 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1681 {
1682 if (pPool->aIdxDirtyPages[i] != NIL_PGMPOOL_IDX)
1683 {
1684 PPGMPOOLPAGE pPage;
1685 unsigned idxPage = pPool->aIdxDirtyPages[i];
1686
1687 pPage = &pPool->aPages[idxPage];
1688 if (pPage->GCPhys == GCPhys)
1689 return true;
1690 }
1691 }
1692 return false;
1693}
1694
1695/**
1696 * Reset all dirty pages by reinstating page monitoring.
1697 *
1698 * @param pVM VM Handle.
1699 */
1700void pgmPoolResetDirtyPages(PVM pVM)
1701{
1702 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1703 Assert(PGMIsLocked(pVM));
1704 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1705
1706 if (!pPool->cDirtyPages)
1707 return;
1708
1709 Log(("pgmPoolResetDirtyPages\n"));
1710 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1711 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1712
1713 pPool->idxFreeDirtyPage = 0;
1714 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aIdxDirtyPages)
1715 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1716 {
1717 unsigned i;
1718 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1719 {
1720 if (pPool->aIdxDirtyPages[i] == NIL_PGMPOOL_IDX)
1721 {
1722 pPool->idxFreeDirtyPage = i;
1723 break;
1724 }
1725 }
1726 AssertMsg(i != RT_ELEMENTS(pPool->aIdxDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1727 }
1728
1729 Assert(pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages));
1730 return;
1731}
1732
1733/**
1734 * Reset all dirty pages by reinstating page monitoring.
1735 *
1736 * @param pVM VM Handle.
1737 * @param GCPhysPT Physical address of the page table
1738 */
1739void pgmPoolInvalidateDirtyPage(PVM pVM, RTGCPHYS GCPhysPT)
1740{
1741 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1742 Assert(PGMIsLocked(pVM));
1743 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1744 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aIdxDirtyPages);
1745
1746 if (!pPool->cDirtyPages)
1747 return;
1748
1749 GCPhysPT = GCPhysPT & ~(RTGCPHYS)(PAGE_SIZE - 1);
1750
1751 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1752 {
1753 if (pPool->aIdxDirtyPages[i] != NIL_PGMPOOL_IDX)
1754 {
1755 unsigned idxPage = pPool->aIdxDirtyPages[i];
1756
1757 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1758 if (pPage->GCPhys == GCPhysPT)
1759 {
1760 idxDirtyPage = i;
1761 break;
1762 }
1763 }
1764 }
1765
1766 if (idxDirtyPage != RT_ELEMENTS(pPool->aIdxDirtyPages))
1767 {
1768 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
1769 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aIdxDirtyPages)
1770 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1771 {
1772 unsigned i;
1773 for (i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1774 {
1775 if (pPool->aIdxDirtyPages[i] == NIL_PGMPOOL_IDX)
1776 {
1777 pPool->idxFreeDirtyPage = i;
1778 break;
1779 }
1780 }
1781 AssertMsg(i != RT_ELEMENTS(pPool->aIdxDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1782 }
1783 }
1784}
1785
1786# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1787
1788/**
1789 * Inserts a page into the GCPhys hash table.
1790 *
1791 * @param pPool The pool.
1792 * @param pPage The page.
1793 */
1794DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1795{
1796 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1797 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1798 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1799 pPage->iNext = pPool->aiHash[iHash];
1800 pPool->aiHash[iHash] = pPage->idx;
1801}
1802
1803
1804/**
1805 * Removes a page from the GCPhys hash table.
1806 *
1807 * @param pPool The pool.
1808 * @param pPage The page.
1809 */
1810DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1811{
1812 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1813 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1814 if (pPool->aiHash[iHash] == pPage->idx)
1815 pPool->aiHash[iHash] = pPage->iNext;
1816 else
1817 {
1818 uint16_t iPrev = pPool->aiHash[iHash];
1819 for (;;)
1820 {
1821 const int16_t i = pPool->aPages[iPrev].iNext;
1822 if (i == pPage->idx)
1823 {
1824 pPool->aPages[iPrev].iNext = pPage->iNext;
1825 break;
1826 }
1827 if (i == NIL_PGMPOOL_IDX)
1828 {
1829 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1830 break;
1831 }
1832 iPrev = i;
1833 }
1834 }
1835 pPage->iNext = NIL_PGMPOOL_IDX;
1836}
1837
1838
1839/**
1840 * Frees up one cache page.
1841 *
1842 * @returns VBox status code.
1843 * @retval VINF_SUCCESS on success.
1844 * @param pPool The pool.
1845 * @param iUser The user index.
1846 */
1847static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1848{
1849#ifndef IN_RC
1850 const PVM pVM = pPool->CTX_SUFF(pVM);
1851#endif
1852 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1853 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1854
1855 /*
1856 * Select one page from the tail of the age list.
1857 */
1858 PPGMPOOLPAGE pPage;
1859 for (unsigned iLoop = 0; ; iLoop++)
1860 {
1861 uint16_t iToFree = pPool->iAgeTail;
1862 if (iToFree == iUser)
1863 iToFree = pPool->aPages[iToFree].iAgePrev;
1864/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1865 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1866 {
1867 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1868 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1869 {
1870 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1871 continue;
1872 iToFree = i;
1873 break;
1874 }
1875 }
1876*/
1877 Assert(iToFree != iUser);
1878 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1879 pPage = &pPool->aPages[iToFree];
1880
1881 /*
1882 * Reject any attempts at flushing the currently active shadow CR3 mapping.
1883 * Call pgmPoolCacheUsed to move the page to the head of the age list.
1884 */
1885 if (!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage))
1886 break;
1887 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
1888 pgmPoolCacheUsed(pPool, pPage);
1889 AssertLogRelReturn(iLoop < 8192, VERR_INTERNAL_ERROR);
1890 }
1891
1892 /*
1893 * Found a usable page, flush it and return.
1894 */
1895 int rc = pgmPoolFlushPage(pPool, pPage);
1896 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
1897 /* todo: find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
1898 if (rc == VINF_SUCCESS)
1899 PGM_INVL_ALL_VCPU_TLBS(pVM);
1900 return rc;
1901}
1902
1903
1904/**
1905 * Checks if a kind mismatch is really a page being reused
1906 * or if it's just normal remappings.
1907 *
1908 * @returns true if reused and the cached page (enmKind1) should be flushed
1909 * @returns false if not reused.
1910 * @param enmKind1 The kind of the cached page.
1911 * @param enmKind2 The kind of the requested page.
1912 */
1913static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1914{
1915 switch (enmKind1)
1916 {
1917 /*
1918 * Never reuse them. There is no remapping in non-paging mode.
1919 */
1920 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1921 case PGMPOOLKIND_32BIT_PD_PHYS:
1922 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1923 case PGMPOOLKIND_PAE_PD_PHYS:
1924 case PGMPOOLKIND_PAE_PDPT_PHYS:
1925 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1926 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1927 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1928 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1929 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1930 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
1931 return false;
1932
1933 /*
1934 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1935 */
1936 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1937 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1938 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1939 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1940 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1941 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1942 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1943 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1944 case PGMPOOLKIND_32BIT_PD:
1945 case PGMPOOLKIND_PAE_PDPT:
1946 switch (enmKind2)
1947 {
1948 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1949 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1950 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1951 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1952 case PGMPOOLKIND_64BIT_PML4:
1953 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1954 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1955 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1956 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1957 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1958 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1959 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1960 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1961 return true;
1962 default:
1963 return false;
1964 }
1965
1966 /*
1967 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1968 */
1969 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1970 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1971 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1972 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1973 case PGMPOOLKIND_64BIT_PML4:
1974 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1975 switch (enmKind2)
1976 {
1977 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1978 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1979 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1980 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1981 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1982 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1983 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1984 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1985 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1986 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1987 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1988 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1989 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1990 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1991 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1992 return true;
1993 default:
1994 return false;
1995 }
1996
1997 /*
1998 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1999 */
2000 case PGMPOOLKIND_ROOT_NESTED:
2001 return false;
2002
2003 default:
2004 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2005 }
2006}
2007
2008
2009/**
2010 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2011 *
2012 * @returns VBox status code.
2013 * @retval VINF_PGM_CACHED_PAGE on success.
2014 * @retval VERR_FILE_NOT_FOUND if not found.
2015 * @param pPool The pool.
2016 * @param GCPhys The GC physical address of the page we're gonna shadow.
2017 * @param enmKind The kind of mapping.
2018 * @param enmAccess Access type for the mapping (only relevant for big pages)
2019 * @param iUser The shadow page pool index of the user table.
2020 * @param iUserTable The index into the user table (shadowed).
2021 * @param ppPage Where to store the pointer to the page.
2022 */
2023static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2024{
2025#ifndef IN_RC
2026 const PVM pVM = pPool->CTX_SUFF(pVM);
2027#endif
2028 /*
2029 * Look up the GCPhys in the hash.
2030 */
2031 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2032 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%x iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2033 if (i != NIL_PGMPOOL_IDX)
2034 {
2035 do
2036 {
2037 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2038 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2039 if (pPage->GCPhys == GCPhys)
2040 {
2041 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2042 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess)
2043 {
2044 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2045 * doesn't flush it in case there are no more free use records.
2046 */
2047 pgmPoolCacheUsed(pPool, pPage);
2048
2049 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2050 if (RT_SUCCESS(rc))
2051 {
2052 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2053 *ppPage = pPage;
2054 if (pPage->cModifications)
2055 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2056 STAM_COUNTER_INC(&pPool->StatCacheHits);
2057 return VINF_PGM_CACHED_PAGE;
2058 }
2059 return rc;
2060 }
2061
2062 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2063 {
2064 /*
2065 * The kind is different. In some cases we should now flush the page
2066 * as it has been reused, but in most cases this is normal remapping
2067 * of PDs as PT or big pages using the GCPhys field in a slightly
2068 * different way than the other kinds.
2069 */
2070 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2071 {
2072 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2073 pgmPoolFlushPage(pPool, pPage);
2074 break;
2075 }
2076 }
2077 }
2078
2079 /* next */
2080 i = pPage->iNext;
2081 } while (i != NIL_PGMPOOL_IDX);
2082 }
2083
2084 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2085 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2086 return VERR_FILE_NOT_FOUND;
2087}
2088
2089
2090/**
2091 * Inserts a page into the cache.
2092 *
2093 * @param pPool The pool.
2094 * @param pPage The cached page.
2095 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2096 */
2097static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2098{
2099 /*
2100 * Insert into the GCPhys hash if the page is fit for that.
2101 */
2102 Assert(!pPage->fCached);
2103 if (fCanBeCached)
2104 {
2105 pPage->fCached = true;
2106 pgmPoolHashInsert(pPool, pPage);
2107 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2108 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2109 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2110 }
2111 else
2112 {
2113 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2114 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2115 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2116 }
2117
2118 /*
2119 * Insert at the head of the age list.
2120 */
2121 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2122 pPage->iAgeNext = pPool->iAgeHead;
2123 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2124 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2125 else
2126 pPool->iAgeTail = pPage->idx;
2127 pPool->iAgeHead = pPage->idx;
2128}
2129
2130
2131/**
2132 * Flushes a cached page.
2133 *
2134 * @param pPool The pool.
2135 * @param pPage The cached page.
2136 */
2137static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2138{
2139 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2140
2141 /*
2142 * Remove the page from the hash.
2143 */
2144 if (pPage->fCached)
2145 {
2146 pPage->fCached = false;
2147 pgmPoolHashRemove(pPool, pPage);
2148 }
2149 else
2150 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2151
2152 /*
2153 * Remove it from the age list.
2154 */
2155 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2156 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2157 else
2158 pPool->iAgeTail = pPage->iAgePrev;
2159 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2160 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2161 else
2162 pPool->iAgeHead = pPage->iAgeNext;
2163 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2164 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2165}
2166
2167
2168/**
2169 * Looks for pages sharing the monitor.
2170 *
2171 * @returns Pointer to the head page.
2172 * @returns NULL if not found.
2173 * @param pPool The Pool
2174 * @param pNewPage The page which is going to be monitored.
2175 */
2176static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2177{
2178 /*
2179 * Look up the GCPhys in the hash.
2180 */
2181 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
2182 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2183 if (i == NIL_PGMPOOL_IDX)
2184 return NULL;
2185 do
2186 {
2187 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2188 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2189 && pPage != pNewPage)
2190 {
2191 switch (pPage->enmKind)
2192 {
2193 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2194 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2195 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2196 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2197 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2198 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2199 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2200 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2201 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2202 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2203 case PGMPOOLKIND_64BIT_PML4:
2204 case PGMPOOLKIND_32BIT_PD:
2205 case PGMPOOLKIND_PAE_PDPT:
2206 {
2207 /* find the head */
2208 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2209 {
2210 Assert(pPage->iMonitoredPrev != pPage->idx);
2211 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2212 }
2213 return pPage;
2214 }
2215
2216 /* ignore, no monitoring. */
2217 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2218 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2219 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2220 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2221 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2222 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2223 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2224 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2225 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2226 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2227 case PGMPOOLKIND_ROOT_NESTED:
2228 case PGMPOOLKIND_PAE_PD_PHYS:
2229 case PGMPOOLKIND_PAE_PDPT_PHYS:
2230 case PGMPOOLKIND_32BIT_PD_PHYS:
2231 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2232 break;
2233 default:
2234 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2235 }
2236 }
2237
2238 /* next */
2239 i = pPage->iNext;
2240 } while (i != NIL_PGMPOOL_IDX);
2241 return NULL;
2242}
2243
2244
2245/**
2246 * Enabled write monitoring of a guest page.
2247 *
2248 * @returns VBox status code.
2249 * @retval VINF_SUCCESS on success.
2250 * @param pPool The pool.
2251 * @param pPage The cached page.
2252 */
2253static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2254{
2255 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
2256
2257 /*
2258 * Filter out the relevant kinds.
2259 */
2260 switch (pPage->enmKind)
2261 {
2262 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2263 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2264 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2265 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2266 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2267 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2268 case PGMPOOLKIND_64BIT_PML4:
2269 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2270 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2271 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2272 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2273 case PGMPOOLKIND_32BIT_PD:
2274 case PGMPOOLKIND_PAE_PDPT:
2275 break;
2276
2277 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2278 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2279 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2280 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2281 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2282 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2283 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2284 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2285 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2286 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2287 case PGMPOOLKIND_ROOT_NESTED:
2288 /* Nothing to monitor here. */
2289 return VINF_SUCCESS;
2290
2291 case PGMPOOLKIND_32BIT_PD_PHYS:
2292 case PGMPOOLKIND_PAE_PDPT_PHYS:
2293 case PGMPOOLKIND_PAE_PD_PHYS:
2294 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2295 /* Nothing to monitor here. */
2296 return VINF_SUCCESS;
2297 default:
2298 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2299 }
2300
2301 /*
2302 * Install handler.
2303 */
2304 int rc;
2305 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2306 if (pPageHead)
2307 {
2308 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2309 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2310
2311#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2312 if (pPageHead->fDirty)
2313 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirty, false /* do not remove */);
2314#endif
2315
2316 pPage->iMonitoredPrev = pPageHead->idx;
2317 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2318 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2319 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2320 pPageHead->iMonitoredNext = pPage->idx;
2321 rc = VINF_SUCCESS;
2322 }
2323 else
2324 {
2325 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2326 PVM pVM = pPool->CTX_SUFF(pVM);
2327 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
2328 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
2329 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
2330 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
2331 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
2332 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
2333 pPool->pszAccessHandler);
2334 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2335 * the heap size should suffice. */
2336 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2337 PVMCPU pVCpu = VMMGetCpu(pVM);
2338 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2339 }
2340 pPage->fMonitored = true;
2341 return rc;
2342}
2343
2344
2345/**
2346 * Disables write monitoring of a guest page.
2347 *
2348 * @returns VBox status code.
2349 * @retval VINF_SUCCESS on success.
2350 * @param pPool The pool.
2351 * @param pPage The cached page.
2352 */
2353static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2354{
2355 /*
2356 * Filter out the relevant kinds.
2357 */
2358 switch (pPage->enmKind)
2359 {
2360 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2361 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2362 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2363 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2364 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2365 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2366 case PGMPOOLKIND_64BIT_PML4:
2367 case PGMPOOLKIND_32BIT_PD:
2368 case PGMPOOLKIND_PAE_PDPT:
2369 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2370 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2371 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2372 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2373 break;
2374
2375 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2376 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2377 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2378 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2379 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2380 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2381 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2382 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2383 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2384 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2385 case PGMPOOLKIND_ROOT_NESTED:
2386 case PGMPOOLKIND_PAE_PD_PHYS:
2387 case PGMPOOLKIND_PAE_PDPT_PHYS:
2388 case PGMPOOLKIND_32BIT_PD_PHYS:
2389 /* Nothing to monitor here. */
2390 Assert(!pPage->fMonitored);
2391 return VINF_SUCCESS;
2392
2393 default:
2394 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2395 }
2396 Assert(pPage->fMonitored);
2397
2398 /*
2399 * Remove the page from the monitored list or uninstall it if last.
2400 */
2401 const PVM pVM = pPool->CTX_SUFF(pVM);
2402 int rc;
2403 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2404 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2405 {
2406 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2407 {
2408 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2409 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2410 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
2411 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
2412 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
2413 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
2414 pPool->pszAccessHandler);
2415 AssertFatalRCSuccess(rc);
2416 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2417 }
2418 else
2419 {
2420 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2421 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2422 {
2423 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2424 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2425 }
2426 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2427 rc = VINF_SUCCESS;
2428 }
2429 }
2430 else
2431 {
2432 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
2433 AssertFatalRC(rc);
2434 PVMCPU pVCpu = VMMGetCpu(pVM);
2435 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2436 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2437 }
2438 pPage->fMonitored = false;
2439
2440 /*
2441 * Remove it from the list of modified pages (if in it).
2442 */
2443 pgmPoolMonitorModifiedRemove(pPool, pPage);
2444
2445 return rc;
2446}
2447
2448
2449/**
2450 * Inserts the page into the list of modified pages.
2451 *
2452 * @param pPool The pool.
2453 * @param pPage The page.
2454 */
2455void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2456{
2457 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2458 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2459 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2460 && pPool->iModifiedHead != pPage->idx,
2461 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2462 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2463 pPool->iModifiedHead, pPool->cModifiedPages));
2464
2465 pPage->iModifiedNext = pPool->iModifiedHead;
2466 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2467 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2468 pPool->iModifiedHead = pPage->idx;
2469 pPool->cModifiedPages++;
2470#ifdef VBOX_WITH_STATISTICS
2471 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2472 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2473#endif
2474}
2475
2476
2477/**
2478 * Removes the page from the list of modified pages and resets the
2479 * moficiation counter.
2480 *
2481 * @param pPool The pool.
2482 * @param pPage The page which is believed to be in the list of modified pages.
2483 */
2484static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2485{
2486 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2487 if (pPool->iModifiedHead == pPage->idx)
2488 {
2489 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2490 pPool->iModifiedHead = pPage->iModifiedNext;
2491 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2492 {
2493 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2494 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2495 }
2496 pPool->cModifiedPages--;
2497 }
2498 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2499 {
2500 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2501 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2502 {
2503 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2504 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2505 }
2506 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2507 pPool->cModifiedPages--;
2508 }
2509 else
2510 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2511 pPage->cModifications = 0;
2512}
2513
2514
2515/**
2516 * Zaps the list of modified pages, resetting their modification counters in the process.
2517 *
2518 * @param pVM The VM handle.
2519 */
2520static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2521{
2522 pgmLock(pVM);
2523 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2524 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2525
2526 unsigned cPages = 0; NOREF(cPages);
2527
2528#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2529 pgmPoolResetDirtyPages(pVM);
2530#endif
2531
2532 uint16_t idx = pPool->iModifiedHead;
2533 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2534 while (idx != NIL_PGMPOOL_IDX)
2535 {
2536 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2537 idx = pPage->iModifiedNext;
2538 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2539 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2540 pPage->cModifications = 0;
2541 Assert(++cPages);
2542 }
2543 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2544 pPool->cModifiedPages = 0;
2545 pgmUnlock(pVM);
2546}
2547
2548
2549/**
2550 * Handle SyncCR3 pool tasks
2551 *
2552 * @returns VBox status code.
2553 * @retval VINF_SUCCESS if successfully added.
2554 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2555 * @param pVCpu The VMCPU handle.
2556 * @remark Should only be used when monitoring is available, thus placed in
2557 * the PGMPOOL_WITH_MONITORING #ifdef.
2558 */
2559int pgmPoolSyncCR3(PVMCPU pVCpu)
2560{
2561 PVM pVM = pVCpu->CTX_SUFF(pVM);
2562 LogFlow(("pgmPoolSyncCR3\n"));
2563
2564 /*
2565 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2566 * Occasionally we will have to clear all the shadow page tables because we wanted
2567 * to monitor a page which was mapped by too many shadowed page tables. This operation
2568 * sometimes refered to as a 'lightweight flush'.
2569 */
2570# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2571 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2572 pgmR3PoolClearAll(pVM);
2573# else /* !IN_RING3 */
2574 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2575 {
2576 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2577 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2578
2579 /* Make sure all other VCPUs return to ring 3. */
2580 if (pVM->cCpus > 1)
2581 {
2582 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2583 PGM_INVL_ALL_VCPU_TLBS(pVM);
2584 }
2585 return VINF_PGM_SYNC_CR3;
2586 }
2587# endif /* !IN_RING3 */
2588 else
2589 {
2590 pgmPoolMonitorModifiedClearAll(pVM);
2591
2592 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2593 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2594 {
2595 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2596 return pgmPoolSyncCR3(pVCpu);
2597 }
2598 }
2599 return VINF_SUCCESS;
2600}
2601
2602
2603/**
2604 * Frees up at least one user entry.
2605 *
2606 * @returns VBox status code.
2607 * @retval VINF_SUCCESS if successfully added.
2608 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2609 * @param pPool The pool.
2610 * @param iUser The user index.
2611 */
2612static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2613{
2614 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2615 /*
2616 * Just free cached pages in a braindead fashion.
2617 */
2618 /** @todo walk the age list backwards and free the first with usage. */
2619 int rc = VINF_SUCCESS;
2620 do
2621 {
2622 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2623 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2624 rc = rc2;
2625 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2626 return rc;
2627}
2628
2629
2630/**
2631 * Inserts a page into the cache.
2632 *
2633 * This will create user node for the page, insert it into the GCPhys
2634 * hash, and insert it into the age list.
2635 *
2636 * @returns VBox status code.
2637 * @retval VINF_SUCCESS if successfully added.
2638 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2639 * @param pPool The pool.
2640 * @param pPage The cached page.
2641 * @param GCPhys The GC physical address of the page we're gonna shadow.
2642 * @param iUser The user index.
2643 * @param iUserTable The user table index.
2644 */
2645DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2646{
2647 int rc = VINF_SUCCESS;
2648 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2649
2650 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser %x iUserTable %x\n", GCPhys, iUser, iUserTable));
2651
2652#ifdef VBOX_STRICT
2653 /*
2654 * Check that the entry doesn't already exists.
2655 */
2656 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2657 {
2658 uint16_t i = pPage->iUserHead;
2659 do
2660 {
2661 Assert(i < pPool->cMaxUsers);
2662 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2663 i = paUsers[i].iNext;
2664 } while (i != NIL_PGMPOOL_USER_INDEX);
2665 }
2666#endif
2667
2668 /*
2669 * Find free a user node.
2670 */
2671 uint16_t i = pPool->iUserFreeHead;
2672 if (i == NIL_PGMPOOL_USER_INDEX)
2673 {
2674 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2675 if (RT_FAILURE(rc))
2676 return rc;
2677 i = pPool->iUserFreeHead;
2678 }
2679
2680 /*
2681 * Unlink the user node from the free list,
2682 * initialize and insert it into the user list.
2683 */
2684 pPool->iUserFreeHead = paUsers[i].iNext;
2685 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2686 paUsers[i].iUser = iUser;
2687 paUsers[i].iUserTable = iUserTable;
2688 pPage->iUserHead = i;
2689
2690 /*
2691 * Insert into cache and enable monitoring of the guest page if enabled.
2692 *
2693 * Until we implement caching of all levels, including the CR3 one, we'll
2694 * have to make sure we don't try monitor & cache any recursive reuse of
2695 * a monitored CR3 page. Because all windows versions are doing this we'll
2696 * have to be able to do combined access monitoring, CR3 + PT and
2697 * PD + PT (guest PAE).
2698 *
2699 * Update:
2700 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2701 */
2702 const bool fCanBeMonitored = true;
2703 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2704 if (fCanBeMonitored)
2705 {
2706 rc = pgmPoolMonitorInsert(pPool, pPage);
2707 AssertRC(rc);
2708 }
2709 return rc;
2710}
2711
2712
2713/**
2714 * Adds a user reference to a page.
2715 *
2716 * This will move the page to the head of the
2717 *
2718 * @returns VBox status code.
2719 * @retval VINF_SUCCESS if successfully added.
2720 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2721 * @param pPool The pool.
2722 * @param pPage The cached page.
2723 * @param iUser The user index.
2724 * @param iUserTable The user table.
2725 */
2726static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2727{
2728 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2729
2730 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2731
2732# ifdef VBOX_STRICT
2733 /*
2734 * Check that the entry doesn't already exists. We only allow multiple users of top-level paging structures (SHW_POOL_ROOT_IDX).
2735 */
2736 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2737 {
2738 uint16_t i = pPage->iUserHead;
2739 do
2740 {
2741 Assert(i < pPool->cMaxUsers);
2742 AssertMsg(iUser != PGMPOOL_IDX_PD || iUser != PGMPOOL_IDX_PDPT || iUser != PGMPOOL_IDX_NESTED_ROOT || iUser != PGMPOOL_IDX_AMD64_CR3 ||
2743 paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2744 i = paUsers[i].iNext;
2745 } while (i != NIL_PGMPOOL_USER_INDEX);
2746 }
2747# endif
2748
2749 /*
2750 * Allocate a user node.
2751 */
2752 uint16_t i = pPool->iUserFreeHead;
2753 if (i == NIL_PGMPOOL_USER_INDEX)
2754 {
2755 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2756 if (RT_FAILURE(rc))
2757 return rc;
2758 i = pPool->iUserFreeHead;
2759 }
2760 pPool->iUserFreeHead = paUsers[i].iNext;
2761
2762 /*
2763 * Initialize the user node and insert it.
2764 */
2765 paUsers[i].iNext = pPage->iUserHead;
2766 paUsers[i].iUser = iUser;
2767 paUsers[i].iUserTable = iUserTable;
2768 pPage->iUserHead = i;
2769
2770# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2771 if (pPage->fDirty)
2772 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirty, false /* do not remove */);
2773# endif
2774
2775 /*
2776 * Tell the cache to update its replacement stats for this page.
2777 */
2778 pgmPoolCacheUsed(pPool, pPage);
2779 return VINF_SUCCESS;
2780}
2781
2782
2783/**
2784 * Frees a user record associated with a page.
2785 *
2786 * This does not clear the entry in the user table, it simply replaces the
2787 * user record to the chain of free records.
2788 *
2789 * @param pPool The pool.
2790 * @param HCPhys The HC physical address of the shadow page.
2791 * @param iUser The shadow page pool index of the user table.
2792 * @param iUserTable The index into the user table (shadowed).
2793 */
2794static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2795{
2796 /*
2797 * Unlink and free the specified user entry.
2798 */
2799 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2800
2801 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2802 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2803 uint16_t i = pPage->iUserHead;
2804 if ( i != NIL_PGMPOOL_USER_INDEX
2805 && paUsers[i].iUser == iUser
2806 && paUsers[i].iUserTable == iUserTable)
2807 {
2808 pPage->iUserHead = paUsers[i].iNext;
2809
2810 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2811 paUsers[i].iNext = pPool->iUserFreeHead;
2812 pPool->iUserFreeHead = i;
2813 return;
2814 }
2815
2816 /* General: Linear search. */
2817 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2818 while (i != NIL_PGMPOOL_USER_INDEX)
2819 {
2820 if ( paUsers[i].iUser == iUser
2821 && paUsers[i].iUserTable == iUserTable)
2822 {
2823 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2824 paUsers[iPrev].iNext = paUsers[i].iNext;
2825 else
2826 pPage->iUserHead = paUsers[i].iNext;
2827
2828 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2829 paUsers[i].iNext = pPool->iUserFreeHead;
2830 pPool->iUserFreeHead = i;
2831 return;
2832 }
2833 iPrev = i;
2834 i = paUsers[i].iNext;
2835 }
2836
2837 /* Fatal: didn't find it */
2838 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2839 iUser, iUserTable, pPage->GCPhys));
2840}
2841
2842
2843/**
2844 * Gets the entry size of a shadow table.
2845 *
2846 * @param enmKind The kind of page.
2847 *
2848 * @returns The size of the entry in bytes. That is, 4 or 8.
2849 * @returns If the kind is not for a table, an assertion is raised and 0 is
2850 * returned.
2851 */
2852DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2853{
2854 switch (enmKind)
2855 {
2856 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2857 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2858 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2859 case PGMPOOLKIND_32BIT_PD:
2860 case PGMPOOLKIND_32BIT_PD_PHYS:
2861 return 4;
2862
2863 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2864 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2865 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2866 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2867 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2868 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2869 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2870 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2871 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2872 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2873 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2874 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2875 case PGMPOOLKIND_64BIT_PML4:
2876 case PGMPOOLKIND_PAE_PDPT:
2877 case PGMPOOLKIND_ROOT_NESTED:
2878 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2879 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2880 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2881 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2882 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2883 case PGMPOOLKIND_PAE_PD_PHYS:
2884 case PGMPOOLKIND_PAE_PDPT_PHYS:
2885 return 8;
2886
2887 default:
2888 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2889 }
2890}
2891
2892
2893/**
2894 * Gets the entry size of a guest table.
2895 *
2896 * @param enmKind The kind of page.
2897 *
2898 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2899 * @returns If the kind is not for a table, an assertion is raised and 0 is
2900 * returned.
2901 */
2902DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2903{
2904 switch (enmKind)
2905 {
2906 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2907 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2908 case PGMPOOLKIND_32BIT_PD:
2909 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2910 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2911 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2912 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2913 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2914 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2915 return 4;
2916
2917 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2918 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2919 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2920 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2921 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2922 case PGMPOOLKIND_64BIT_PML4:
2923 case PGMPOOLKIND_PAE_PDPT:
2924 return 8;
2925
2926 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2927 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2928 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2929 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2930 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2931 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2932 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2933 case PGMPOOLKIND_ROOT_NESTED:
2934 case PGMPOOLKIND_PAE_PD_PHYS:
2935 case PGMPOOLKIND_PAE_PDPT_PHYS:
2936 case PGMPOOLKIND_32BIT_PD_PHYS:
2937 /** @todo can we return 0? (nobody is calling this...) */
2938 AssertFailed();
2939 return 0;
2940
2941 default:
2942 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2943 }
2944}
2945
2946
2947/**
2948 * Scans one shadow page table for mappings of a physical page.
2949 *
2950 * @returns true/false indicating removal of all relevant PTEs
2951 * @param pVM The VM handle.
2952 * @param pPhysPage The guest page in question.
2953 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
2954 * @param iShw The shadow page table.
2955 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
2956 * @param cRefs The number of references made in that PT.
2957 */
2958static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte, uint16_t cRefs)
2959{
2960 LogFlow(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d iPte=%d cRefs=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte, cRefs));
2961 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2962 bool bRet = false;
2963
2964 /*
2965 * Assert sanity.
2966 */
2967 Assert(cRefs == 1);
2968 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
2969 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2970 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2971
2972 /*
2973 * Then, clear the actual mappings to the page in the shadow PT.
2974 */
2975 switch (pPage->enmKind)
2976 {
2977 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2978 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2979 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2980 {
2981 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2982 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2983 uint32_t u32AndMask, u32OrMask;
2984
2985 u32AndMask = 0;
2986 u32OrMask = 0;
2987
2988 if (!fFlushPTEs)
2989 {
2990 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
2991 {
2992 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
2993 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
2994 u32OrMask = X86_PTE_RW;
2995 u32AndMask = UINT32_MAX;
2996 bRet = true;
2997 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
2998 break;
2999
3000 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3001 u32OrMask = 0;
3002 u32AndMask = ~X86_PTE_RW;
3003 bRet = true;
3004 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3005 break;
3006 default:
3007 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3008 break;
3009 }
3010 }
3011 else
3012 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3013
3014 /* Update the counter if we're removing references. */
3015 if (!u32AndMask)
3016 {
3017 Assert(pPage->cPresent >= cRefs);
3018 Assert(pPool->cPresent >= cRefs);
3019 pPage->cPresent -= cRefs;
3020 pPool->cPresent -= cRefs;
3021 }
3022
3023 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3024 {
3025 X86PTE Pte;
3026
3027 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", iPte, pPT->a[iPte], cRefs));
3028 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3029 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3030 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3031
3032 ASMAtomicWriteSize(&pPT->a[iPte].u, Pte.u);
3033 return bRet;
3034 }
3035#ifdef LOG_ENABLED
3036 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3037 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3038 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3039 {
3040 Log(("i=%d cRefs=%d\n", i, cRefs--));
3041 }
3042#endif
3043 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3044 break;
3045 }
3046
3047 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3048 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3049 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3050 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3051 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3052 {
3053 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3054 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3055 uint64_t u64AndMask, u64OrMask;
3056
3057 u64OrMask = 0;
3058 u64AndMask = 0;
3059 if (!fFlushPTEs)
3060 {
3061 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3062 {
3063 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
3064 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
3065 u64OrMask = X86_PTE_RW;
3066 u64AndMask = UINT64_MAX;
3067 bRet = true;
3068 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3069 break;
3070
3071 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3072 u64OrMask = 0;
3073 u64AndMask = ~((uint64_t)X86_PTE_RW);
3074 bRet = true;
3075 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3076 break;
3077
3078 default:
3079 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3080 break;
3081 }
3082 }
3083 else
3084 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3085
3086 /* Update the counter if we're removing references. */
3087 if (!u64AndMask)
3088 {
3089 Assert(pPage->cPresent >= cRefs);
3090 Assert(pPool->cPresent >= cRefs);
3091 pPage->cPresent -= cRefs;
3092 pPool->cPresent -= cRefs;
3093 }
3094
3095 if ((pPT->a[iPte].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3096 {
3097 X86PTEPAE Pte;
3098
3099 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", iPte, pPT->a[iPte], cRefs));
3100 Pte.u = (pPT->a[iPte].u & u64AndMask) | u64OrMask;
3101 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3102 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3103
3104 ASMAtomicWriteSize(&pPT->a[iPte].u, Pte.u);
3105 return bRet;
3106 }
3107#ifdef LOG_ENABLED
3108 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3109 Log(("Found %RX64 expected %RX64\n", pPT->a[iPte].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P), u64));
3110 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3111 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3112 {
3113 Log(("i=%d cRefs=%d\n", i, cRefs--));
3114 }
3115#endif
3116 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind));
3117 break;
3118 }
3119
3120 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3121 {
3122 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3123 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3124
3125 if ((pPT->a[iPte].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3126 {
3127 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", iPte, pPT->a[iPte], cRefs));
3128 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3129 pPT->a[iPte].u = 0;
3130
3131 /* Update the counter as we're removing references. */
3132 Assert(pPage->cPresent);
3133 Assert(pPool->cPresent);
3134 pPage->cPresent--;
3135 pPool->cPresent--;
3136 return bRet;
3137 }
3138#ifdef LOG_ENABLED
3139 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3140 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3141 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3142 {
3143 Log(("i=%d cRefs=%d\n", i, cRefs--));
3144 }
3145#endif
3146 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3147 break;
3148 }
3149
3150#ifdef PGM_WITH_LARGE_PAGES
3151 /* Large page case only. */
3152 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3153 {
3154 Assert(HWACCMIsNestedPagingActive(pVM));
3155
3156 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3157 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3158
3159 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3160 {
3161 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64 cRefs=%#x\n", iPte, pPD->a[iPte], cRefs));
3162 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3163 pPD->a[iPte].u = 0;
3164
3165 /* Update the counter as we're removing references. */
3166 Assert(pPage->cPresent);
3167 Assert(pPool->cPresent);
3168 pPage->cPresent--;
3169 pPool->cPresent--;
3170
3171 return bRet;
3172 }
3173# ifdef LOG_ENABLED
3174 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3175 for (unsigned i = 0; i < RT_ELEMENTS(pPD->a); i++)
3176 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3177 {
3178 Log(("i=%d cRefs=%d\n", i, cRefs--));
3179 }
3180# endif
3181 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3182 break;
3183 }
3184
3185 /* AMD-V nested paging - @todo merge with EPT as we only check the parts that are identical. */
3186 case PGMPOOLKIND_PAE_PD_PHYS:
3187 {
3188 Assert(HWACCMIsNestedPagingActive(pVM));
3189
3190 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3191 PX86PD pPD = (PX86PD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3192
3193 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3194 {
3195 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64 cRefs=%#x\n", iPte, pPD->a[iPte], cRefs));
3196 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3197 pPD->a[iPte].u = 0;
3198
3199 /* Update the counter as we're removing references. */
3200 Assert(pPage->cPresent);
3201 Assert(pPool->cPresent);
3202 pPage->cPresent--;
3203 pPool->cPresent--;
3204 return bRet;
3205 }
3206# ifdef LOG_ENABLED
3207 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3208 for (unsigned i = 0; i < RT_ELEMENTS(pPD->a); i++)
3209 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3210 {
3211 Log(("i=%d cRefs=%d\n", i, cRefs--));
3212 }
3213# endif
3214 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3215 break;
3216 }
3217#endif /* PGM_WITH_LARGE_PAGES */
3218
3219 default:
3220 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3221 }
3222 return bRet;
3223}
3224
3225
3226/**
3227 * Scans one shadow page table for mappings of a physical page.
3228 *
3229 * @param pVM The VM handle.
3230 * @param pPhysPage The guest page in question.
3231 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3232 * @param iShw The shadow page table.
3233 * @param cRefs The number of references made in that PT.
3234 */
3235static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t cRefs)
3236{
3237 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3238
3239 /* We should only come here with when there's only one reference to this physical page. */
3240 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3241 Assert(cRefs == 1);
3242
3243 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d cRefs=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, cRefs));
3244 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3245 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage), cRefs);
3246 if (!fKeptPTEs)
3247 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3248 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3249}
3250
3251
3252/**
3253 * Flushes a list of shadow page tables mapping the same physical page.
3254 *
3255 * @param pVM The VM handle.
3256 * @param pPhysPage The guest page in question.
3257 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3258 * @param iPhysExt The physical cross reference extent list to flush.
3259 */
3260static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3261{
3262 Assert(PGMIsLockOwner(pVM));
3263 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3264 bool fKeepList = false;
3265
3266 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3267 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3268
3269 const uint16_t iPhysExtStart = iPhysExt;
3270 PPGMPOOLPHYSEXT pPhysExt;
3271 do
3272 {
3273 Assert(iPhysExt < pPool->cMaxPhysExts);
3274 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3275 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3276 {
3277 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3278 {
3279 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i], 1);
3280 if (!fKeptPTEs)
3281 {
3282 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3283 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3284 }
3285 else
3286 fKeepList = true;
3287 }
3288 }
3289 /* next */
3290 iPhysExt = pPhysExt->iNext;
3291 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3292
3293 if (!fKeepList)
3294 {
3295 /* insert the list into the free list and clear the ram range entry. */
3296 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3297 pPool->iPhysExtFreeHead = iPhysExtStart;
3298 /* Invalidate the tracking data. */
3299 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3300 }
3301
3302 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3303}
3304
3305
3306/**
3307 * Flushes all shadow page table mappings of the given guest page.
3308 *
3309 * This is typically called when the host page backing the guest one has been
3310 * replaced or when the page protection was changed due to an access handler.
3311 *
3312 * @returns VBox status code.
3313 * @retval VINF_SUCCESS if all references has been successfully cleared.
3314 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3315 * pool cleaning. FF and sync flags are set.
3316 *
3317 * @param pVM The VM handle.
3318 * @param GCPhysPage GC physical address of the page in question
3319 * @param pPhysPage The guest page in question.
3320 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3321 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3322 * flushed, it is NOT touched if this isn't necessary.
3323 * The caller MUST initialized this to @a false.
3324 */
3325int pgmPoolTrackUpdateGCPhys(PVM pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3326{
3327 PVMCPU pVCpu = VMMGetCpu(pVM);
3328 pgmLock(pVM);
3329 int rc = VINF_SUCCESS;
3330
3331#ifdef PGM_WITH_LARGE_PAGES
3332 /* Is this page part of a large page? */
3333 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3334 {
3335 PPGMPAGE pPhysBase;
3336 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3337
3338 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3339
3340 /* Fetch the large page base. */
3341 if (GCPhysBase != GCPhysPage)
3342 {
3343 pPhysBase = pgmPhysGetPage(&pVM->pgm.s, GCPhysBase);
3344 AssertFatal(pPhysBase);
3345 }
3346 else
3347 pPhysBase = pPhysPage;
3348
3349 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3350
3351 if (PGM_PAGE_GET_PDE_TYPE(pPhysBase) == PGM_PAGE_PDE_TYPE_PDE)
3352 {
3353 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3354 PGM_PAGE_SET_PDE_TYPE(pPhysBase, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3355
3356 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3357 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pPhysBase, fFlushPTEs, pfFlushTLBs);
3358
3359 *pfFlushTLBs = true;
3360 pgmUnlock(pVM);
3361 return rc;
3362 }
3363 }
3364#else
3365 NOREF(GCPhysPage);
3366#endif /* PGM_WITH_LARGE_PAGES */
3367
3368 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3369 if (u16)
3370 {
3371 /*
3372 * The zero page is currently screwing up the tracking and we'll
3373 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3374 * is defined, zero pages won't normally be mapped. Some kind of solution
3375 * will be needed for this problem of course, but it will have to wait...
3376 */
3377 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3378 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3379 rc = VINF_PGM_GCPHYS_ALIASED;
3380 else
3381 {
3382# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3383 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3384 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3385 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
3386# endif
3387
3388 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3389 pgmPoolTrackFlushGCPhysPT(pVM,
3390 pPhysPage,
3391 fFlushPTEs,
3392 PGMPOOL_TD_GET_IDX(u16),
3393 PGMPOOL_TD_GET_CREFS(u16));
3394 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3395 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3396 else
3397 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3398 *pfFlushTLBs = true;
3399
3400# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3401 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
3402# endif
3403 }
3404 }
3405
3406 if (rc == VINF_PGM_GCPHYS_ALIASED)
3407 {
3408 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3409 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3410 rc = VINF_PGM_SYNC_CR3;
3411 }
3412 pgmUnlock(pVM);
3413 return rc;
3414}
3415
3416
3417/**
3418 * Scans all shadow page tables for mappings of a physical page.
3419 *
3420 * This may be slow, but it's most likely more efficient than cleaning
3421 * out the entire page pool / cache.
3422 *
3423 * @returns VBox status code.
3424 * @retval VINF_SUCCESS if all references has been successfully cleared.
3425 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3426 * a page pool cleaning.
3427 *
3428 * @param pVM The VM handle.
3429 * @param pPhysPage The guest page in question.
3430 */
3431int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3432{
3433 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3434 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3435 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3436 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3437
3438#if 1
3439 /*
3440 * There is a limit to what makes sense.
3441 */
3442 if (pPool->cPresent > 1024)
3443 {
3444 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3445 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3446 return VINF_PGM_GCPHYS_ALIASED;
3447 }
3448#endif
3449
3450 /*
3451 * Iterate all the pages until we've encountered all that in use.
3452 * This is simple but not quite optimal solution.
3453 */
3454 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3455 const uint32_t u32 = u64;
3456 unsigned cLeft = pPool->cUsedPages;
3457 unsigned iPage = pPool->cCurPages;
3458 while (--iPage >= PGMPOOL_IDX_FIRST)
3459 {
3460 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3461 if ( pPage->GCPhys != NIL_RTGCPHYS
3462 && pPage->cPresent)
3463 {
3464 switch (pPage->enmKind)
3465 {
3466 /*
3467 * We only care about shadow page tables.
3468 */
3469 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3470 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3471 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3472 {
3473 unsigned cPresent = pPage->cPresent;
3474 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3475 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3476 if (pPT->a[i].n.u1Present)
3477 {
3478 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3479 {
3480 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3481 pPT->a[i].u = 0;
3482
3483 /* Update the counter as we're removing references. */
3484 Assert(pPage->cPresent);
3485 Assert(pPool->cPresent);
3486 pPage->cPresent--;
3487 pPool->cPresent--;
3488 }
3489 if (!--cPresent)
3490 break;
3491 }
3492 break;
3493 }
3494
3495 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3496 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3497 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3498 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3499 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3500 {
3501 unsigned cPresent = pPage->cPresent;
3502 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3503 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3504 if (pPT->a[i].n.u1Present)
3505 {
3506 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3507 {
3508 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3509 pPT->a[i].u = 0;
3510
3511 /* Update the counter as we're removing references. */
3512 Assert(pPage->cPresent);
3513 Assert(pPool->cPresent);
3514 pPage->cPresent--;
3515 pPool->cPresent--;
3516 }
3517 if (!--cPresent)
3518 break;
3519 }
3520 break;
3521 }
3522#ifndef IN_RC
3523 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3524 {
3525 unsigned cPresent = pPage->cPresent;
3526 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3527 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3528 if (pPT->a[i].n.u1Present)
3529 {
3530 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3531 {
3532 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3533 pPT->a[i].u = 0;
3534
3535 /* Update the counter as we're removing references. */
3536 Assert(pPage->cPresent);
3537 Assert(pPool->cPresent);
3538 pPage->cPresent--;
3539 pPool->cPresent--;
3540 }
3541 if (!--cPresent)
3542 break;
3543 }
3544 break;
3545 }
3546#endif
3547 }
3548 if (!--cLeft)
3549 break;
3550 }
3551 }
3552
3553 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3554 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3555 return VINF_SUCCESS;
3556}
3557
3558
3559/**
3560 * Clears the user entry in a user table.
3561 *
3562 * This is used to remove all references to a page when flushing it.
3563 */
3564static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3565{
3566 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3567 Assert(pUser->iUser < pPool->cCurPages);
3568 uint32_t iUserTable = pUser->iUserTable;
3569
3570 /*
3571 * Map the user page.
3572 */
3573 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3574 union
3575 {
3576 uint64_t *pau64;
3577 uint32_t *pau32;
3578 } u;
3579 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3580
3581 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3582
3583 /* Safety precaution in case we change the paging for other modes too in the future. */
3584 Assert(!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage));
3585
3586#ifdef VBOX_STRICT
3587 /*
3588 * Some sanity checks.
3589 */
3590 switch (pUserPage->enmKind)
3591 {
3592 case PGMPOOLKIND_32BIT_PD:
3593 case PGMPOOLKIND_32BIT_PD_PHYS:
3594 Assert(iUserTable < X86_PG_ENTRIES);
3595 break;
3596 case PGMPOOLKIND_PAE_PDPT:
3597 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3598 case PGMPOOLKIND_PAE_PDPT_PHYS:
3599 Assert(iUserTable < 4);
3600 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3601 break;
3602 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3603 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3604 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3605 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3606 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3607 case PGMPOOLKIND_PAE_PD_PHYS:
3608 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3609 break;
3610 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3611 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3612 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3613 break;
3614 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3615 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3616 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3617 break;
3618 case PGMPOOLKIND_64BIT_PML4:
3619 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3620 /* GCPhys >> PAGE_SHIFT is the index here */
3621 break;
3622 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3623 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3624 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3625 break;
3626
3627 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3628 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3629 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3630 break;
3631
3632 case PGMPOOLKIND_ROOT_NESTED:
3633 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3634 break;
3635
3636 default:
3637 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3638 break;
3639 }
3640#endif /* VBOX_STRICT */
3641
3642 /*
3643 * Clear the entry in the user page.
3644 */
3645 switch (pUserPage->enmKind)
3646 {
3647 /* 32-bit entries */
3648 case PGMPOOLKIND_32BIT_PD:
3649 case PGMPOOLKIND_32BIT_PD_PHYS:
3650 ASMAtomicWriteSize(&u.pau32[iUserTable], 0);
3651 break;
3652
3653 /* 64-bit entries */
3654 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3655 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3656 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3657 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3658 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3659#if defined(IN_RC)
3660 /* In 32 bits PAE mode we *must* invalidate the TLB when changing a PDPT entry; the CPU fetches them only during cr3 load, so any
3661 * non-present PDPT will continue to cause page faults.
3662 */
3663 ASMReloadCR3();
3664#endif
3665 /* no break */
3666 case PGMPOOLKIND_PAE_PD_PHYS:
3667 case PGMPOOLKIND_PAE_PDPT_PHYS:
3668 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3669 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3670 case PGMPOOLKIND_64BIT_PML4:
3671 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3672 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3673 case PGMPOOLKIND_PAE_PDPT:
3674 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3675 case PGMPOOLKIND_ROOT_NESTED:
3676 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3677 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3678 ASMAtomicWriteSize(&u.pau64[iUserTable], 0);
3679 break;
3680
3681 default:
3682 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3683 }
3684}
3685
3686
3687/**
3688 * Clears all users of a page.
3689 */
3690static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3691{
3692 /*
3693 * Free all the user records.
3694 */
3695 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3696
3697 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3698 uint16_t i = pPage->iUserHead;
3699 while (i != NIL_PGMPOOL_USER_INDEX)
3700 {
3701 /* Clear enter in user table. */
3702 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3703
3704 /* Free it. */
3705 const uint16_t iNext = paUsers[i].iNext;
3706 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3707 paUsers[i].iNext = pPool->iUserFreeHead;
3708 pPool->iUserFreeHead = i;
3709
3710 /* Next. */
3711 i = iNext;
3712 }
3713 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3714}
3715
3716
3717/**
3718 * Allocates a new physical cross reference extent.
3719 *
3720 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3721 * @param pVM The VM handle.
3722 * @param piPhysExt Where to store the phys ext index.
3723 */
3724PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3725{
3726 Assert(PGMIsLockOwner(pVM));
3727 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3728 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3729 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3730 {
3731 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3732 return NULL;
3733 }
3734 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3735 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3736 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3737 *piPhysExt = iPhysExt;
3738 return pPhysExt;
3739}
3740
3741
3742/**
3743 * Frees a physical cross reference extent.
3744 *
3745 * @param pVM The VM handle.
3746 * @param iPhysExt The extent to free.
3747 */
3748void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3749{
3750 Assert(PGMIsLockOwner(pVM));
3751 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3752 Assert(iPhysExt < pPool->cMaxPhysExts);
3753 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3754 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3755 {
3756 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3757 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3758 }
3759 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3760 pPool->iPhysExtFreeHead = iPhysExt;
3761}
3762
3763
3764/**
3765 * Frees a physical cross reference extent.
3766 *
3767 * @param pVM The VM handle.
3768 * @param iPhysExt The extent to free.
3769 */
3770void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3771{
3772 Assert(PGMIsLockOwner(pVM));
3773 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3774
3775 const uint16_t iPhysExtStart = iPhysExt;
3776 PPGMPOOLPHYSEXT pPhysExt;
3777 do
3778 {
3779 Assert(iPhysExt < pPool->cMaxPhysExts);
3780 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3781 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3782 {
3783 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3784 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3785 }
3786
3787 /* next */
3788 iPhysExt = pPhysExt->iNext;
3789 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3790
3791 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3792 pPool->iPhysExtFreeHead = iPhysExtStart;
3793}
3794
3795
3796/**
3797 * Insert a reference into a list of physical cross reference extents.
3798 *
3799 * @returns The new tracking data for PGMPAGE.
3800 *
3801 * @param pVM The VM handle.
3802 * @param iPhysExt The physical extent index of the list head.
3803 * @param iShwPT The shadow page table index.
3804 * @param iPte Page table entry
3805 *
3806 */
3807static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
3808{
3809 Assert(PGMIsLockOwner(pVM));
3810 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3811 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3812
3813 /* special common case. */
3814 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3815 {
3816 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3817 paPhysExts[iPhysExt].apte[2] = iPte;
3818 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3819 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
3820 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3821 }
3822
3823 /* general treatment. */
3824 const uint16_t iPhysExtStart = iPhysExt;
3825 unsigned cMax = 15;
3826 for (;;)
3827 {
3828 Assert(iPhysExt < pPool->cMaxPhysExts);
3829 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3830 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3831 {
3832 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3833 paPhysExts[iPhysExt].apte[i] = iPte;
3834 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3835 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
3836 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3837 }
3838 if (!--cMax)
3839 {
3840 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3841 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3842 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
3843 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3844 }
3845 }
3846
3847 /* add another extent to the list. */
3848 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3849 if (!pNew)
3850 {
3851 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackNoExtentsLeft);
3852 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3853 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
3854 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3855 }
3856 pNew->iNext = iPhysExtStart;
3857 pNew->aidx[0] = iShwPT;
3858 pNew->apte[0] = iPte;
3859 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
3860 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3861}
3862
3863
3864/**
3865 * Add a reference to guest physical page where extents are in use.
3866 *
3867 * @returns The new tracking data for PGMPAGE.
3868 *
3869 * @param pVM The VM handle.
3870 * @param pPhysPage Pointer to the aPages entry in the ram range.
3871 * @param u16 The ram range flags (top 16-bits).
3872 * @param iShwPT The shadow page table index.
3873 * @param iPte Page table entry
3874 */
3875uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
3876{
3877 pgmLock(pVM);
3878 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3879 {
3880 /*
3881 * Convert to extent list.
3882 */
3883 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3884 uint16_t iPhysExt;
3885 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3886 if (pPhysExt)
3887 {
3888 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
3889 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
3890 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
3891 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
3892 pPhysExt->aidx[1] = iShwPT;
3893 pPhysExt->apte[1] = iPte;
3894 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3895 }
3896 else
3897 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3898 }
3899 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3900 {
3901 /*
3902 * Insert into the extent list.
3903 */
3904 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
3905 }
3906 else
3907 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3908 pgmUnlock(pVM);
3909 return u16;
3910}
3911
3912/**
3913 * Clear references to guest physical memory.
3914 *
3915 * @param pPool The pool.
3916 * @param pPage The page.
3917 * @param pPhysPage Pointer to the aPages entry in the ram range.
3918 * @param iPte Shadow PTE index
3919 */
3920void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
3921{
3922 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
3923 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3924
3925 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
3926 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
3927 {
3928 PVM pVM = pPool->CTX_SUFF(pVM);
3929 pgmLock(pVM);
3930
3931 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3932 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3933 do
3934 {
3935 Assert(iPhysExt < pPool->cMaxPhysExts);
3936
3937 /*
3938 * Look for the shadow page and check if it's all freed.
3939 */
3940 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3941 {
3942 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
3943 && paPhysExts[iPhysExt].apte[i] == iPte)
3944 {
3945 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3946 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3947
3948 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3949 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3950 {
3951 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3952 pgmUnlock(pVM);
3953 return;
3954 }
3955
3956 /* we can free the node. */
3957 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3958 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3959 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3960 {
3961 /* lonely node */
3962 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3963 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
3964 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3965 }
3966 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3967 {
3968 /* head */
3969 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
3970 PGM_PAGE_SET_TRACKING(pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
3971 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3972 }
3973 else
3974 {
3975 /* in list */
3976 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
3977 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3978 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3979 }
3980 iPhysExt = iPhysExtNext;
3981 pgmUnlock(pVM);
3982 return;
3983 }
3984 }
3985
3986 /* next */
3987 iPhysExtPrev = iPhysExt;
3988 iPhysExt = paPhysExts[iPhysExt].iNext;
3989 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3990
3991 pgmUnlock(pVM);
3992 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3993 }
3994 else /* nothing to do */
3995 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
3996}
3997
3998/**
3999 * Clear references to guest physical memory.
4000 *
4001 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
4002 * is assumed to be correct, so the linear search can be skipped and we can assert
4003 * at an earlier point.
4004 *
4005 * @param pPool The pool.
4006 * @param pPage The page.
4007 * @param HCPhys The host physical address corresponding to the guest page.
4008 * @param GCPhys The guest physical address corresponding to HCPhys.
4009 * @param iPte Shadow PTE index
4010 */
4011static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4012{
4013 /*
4014 * Walk range list.
4015 */
4016 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
4017 while (pRam)
4018 {
4019 RTGCPHYS off = GCPhys - pRam->GCPhys;
4020 if (off < pRam->cb)
4021 {
4022 /* does it match? */
4023 const unsigned iPage = off >> PAGE_SHIFT;
4024 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
4025#ifdef LOG_ENABLED
4026 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
4027 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4028#endif
4029 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4030 {
4031 Assert(pPage->cPresent);
4032 Assert(pPool->cPresent);
4033 pPage->cPresent--;
4034 pPool->cPresent--;
4035 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4036 return;
4037 }
4038 break;
4039 }
4040 pRam = pRam->CTX_SUFF(pNext);
4041 }
4042 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4043}
4044
4045
4046/**
4047 * Clear references to guest physical memory.
4048 *
4049 * @param pPool The pool.
4050 * @param pPage The page.
4051 * @param HCPhys The host physical address corresponding to the guest page.
4052 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4053 * @param iPte Shadow pte index
4054 */
4055void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4056{
4057 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4058
4059 /*
4060 * Walk range list.
4061 */
4062 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
4063 while (pRam)
4064 {
4065 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
4066 if (off < pRam->cb)
4067 {
4068 /* does it match? */
4069 const unsigned iPage = off >> PAGE_SHIFT;
4070 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
4071 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4072 {
4073 Assert(pPage->cPresent);
4074 Assert(pPool->cPresent);
4075 pPage->cPresent--;
4076 pPool->cPresent--;
4077 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4078 return;
4079 }
4080 break;
4081 }
4082 pRam = pRam->CTX_SUFF(pNext);
4083 }
4084
4085 /*
4086 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4087 */
4088 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4089 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
4090 while (pRam)
4091 {
4092 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4093 while (iPage-- > 0)
4094 {
4095 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4096 {
4097 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4098 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4099 Assert(pPage->cPresent);
4100 Assert(pPool->cPresent);
4101 pPage->cPresent--;
4102 pPool->cPresent--;
4103 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4104 return;
4105 }
4106 }
4107 pRam = pRam->CTX_SUFF(pNext);
4108 }
4109
4110 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
4111}
4112
4113
4114/**
4115 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4116 *
4117 * @param pPool The pool.
4118 * @param pPage The page.
4119 * @param pShwPT The shadow page table (mapping of the page).
4120 * @param pGstPT The guest page table.
4121 */
4122DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4123{
4124 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4125 if (pShwPT->a[i].n.u1Present)
4126 {
4127 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4128 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4129 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK, i);
4130 if (!pPage->cPresent)
4131 break;
4132 }
4133}
4134
4135
4136/**
4137 * Clear references to guest physical memory in a PAE / 32-bit page table.
4138 *
4139 * @param pPool The pool.
4140 * @param pPage The page.
4141 * @param pShwPT The shadow page table (mapping of the page).
4142 * @param pGstPT The guest page table (just a half one).
4143 */
4144DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
4145{
4146 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4147 if (pShwPT->a[i].n.u1Present)
4148 {
4149 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4150 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4151 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK, i);
4152 if (!pPage->cPresent)
4153 break;
4154 }
4155}
4156
4157
4158/**
4159 * Clear references to guest physical memory in a PAE / PAE page table.
4160 *
4161 * @param pPool The pool.
4162 * @param pPage The page.
4163 * @param pShwPT The shadow page table (mapping of the page).
4164 * @param pGstPT The guest page table.
4165 */
4166DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
4167{
4168 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4169 if (pShwPT->a[i].n.u1Present)
4170 {
4171 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4172 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4173 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
4174 if (!pPage->cPresent)
4175 break;
4176 }
4177}
4178
4179
4180/**
4181 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4182 *
4183 * @param pPool The pool.
4184 * @param pPage The page.
4185 * @param pShwPT The shadow page table (mapping of the page).
4186 */
4187DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4188{
4189 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4190 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4191 if (pShwPT->a[i].n.u1Present)
4192 {
4193 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4194 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4195 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys, i);
4196 if (!pPage->cPresent)
4197 break;
4198 }
4199}
4200
4201
4202/**
4203 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4204 *
4205 * @param pPool The pool.
4206 * @param pPage The page.
4207 * @param pShwPT The shadow page table (mapping of the page).
4208 */
4209DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
4210{
4211 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4212 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4213 if (pShwPT->a[i].n.u1Present)
4214 {
4215 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4216 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
4217 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys, i);
4218 if (!pPage->cPresent)
4219 break;
4220 }
4221}
4222
4223
4224/**
4225 * Clear references to shadowed pages in an EPT page table.
4226 *
4227 * @param pPool The pool.
4228 * @param pPage The page.
4229 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4230 */
4231DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4232{
4233 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4234 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4235 if (pShwPT->a[i].n.u1Present)
4236 {
4237 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4238 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4239 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys, i);
4240 if (!pPage->cPresent)
4241 break;
4242 }
4243}
4244
4245
4246
4247/**
4248 * Clear references to shadowed pages in a 32 bits page directory.
4249 *
4250 * @param pPool The pool.
4251 * @param pPage The page.
4252 * @param pShwPD The shadow page directory (mapping of the page).
4253 */
4254DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4255{
4256 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4257 {
4258 if ( pShwPD->a[i].n.u1Present
4259 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4260 )
4261 {
4262 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4263 if (pSubPage)
4264 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4265 else
4266 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4267 }
4268 }
4269}
4270
4271/**
4272 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4273 *
4274 * @param pPool The pool.
4275 * @param pPage The page.
4276 * @param pShwPD The shadow page directory (mapping of the page).
4277 */
4278DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4279{
4280 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4281 {
4282 if ( pShwPD->a[i].n.u1Present
4283 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4284 )
4285 {
4286#ifdef PGM_WITH_LARGE_PAGES
4287 if (pShwPD->a[i].b.u1Size)
4288 {
4289 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4290 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4291 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys /* == base of 2 MB page */, i);
4292 }
4293 else
4294#endif
4295 {
4296 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4297 if (pSubPage)
4298 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4299 else
4300 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4301 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4302 }
4303 }
4304 }
4305}
4306
4307/**
4308 * Clear references to shadowed pages in a PAE page directory pointer table.
4309 *
4310 * @param pPool The pool.
4311 * @param pPage The page.
4312 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4313 */
4314DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4315{
4316 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4317 {
4318 if ( pShwPDPT->a[i].n.u1Present
4319 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4320 )
4321 {
4322 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4323 if (pSubPage)
4324 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4325 else
4326 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4327 }
4328 }
4329}
4330
4331
4332/**
4333 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4334 *
4335 * @param pPool The pool.
4336 * @param pPage The page.
4337 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4338 */
4339DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4340{
4341 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4342 {
4343 Assert(!(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING));
4344 if (pShwPDPT->a[i].n.u1Present)
4345 {
4346 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4347 if (pSubPage)
4348 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4349 else
4350 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4351 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4352 }
4353 }
4354}
4355
4356
4357/**
4358 * Clear references to shadowed pages in a 64-bit level 4 page table.
4359 *
4360 * @param pPool The pool.
4361 * @param pPage The page.
4362 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4363 */
4364DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4365{
4366 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4367 {
4368 if (pShwPML4->a[i].n.u1Present)
4369 {
4370 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4371 if (pSubPage)
4372 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4373 else
4374 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4375 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4376 }
4377 }
4378}
4379
4380
4381/**
4382 * Clear references to shadowed pages in an EPT page directory.
4383 *
4384 * @param pPool The pool.
4385 * @param pPage The page.
4386 * @param pShwPD The shadow page directory (mapping of the page).
4387 */
4388DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4389{
4390 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4391 {
4392 if (pShwPD->a[i].n.u1Present)
4393 {
4394#ifdef PGM_WITH_LARGE_PAGES
4395 if (pShwPD->a[i].b.u1Size)
4396 {
4397 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4398 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4399 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys /* == base of 2 MB page */, i);
4400 }
4401 else
4402#endif
4403 {
4404 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4405 if (pSubPage)
4406 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4407 else
4408 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4409 }
4410 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4411 }
4412 }
4413}
4414
4415
4416/**
4417 * Clear references to shadowed pages in an EPT page directory pointer table.
4418 *
4419 * @param pPool The pool.
4420 * @param pPage The page.
4421 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4422 */
4423DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4424{
4425 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4426 {
4427 if (pShwPDPT->a[i].n.u1Present)
4428 {
4429 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4430 if (pSubPage)
4431 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4432 else
4433 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4434 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4435 }
4436 }
4437}
4438
4439
4440/**
4441 * Clears all references made by this page.
4442 *
4443 * This includes other shadow pages and GC physical addresses.
4444 *
4445 * @param pPool The pool.
4446 * @param pPage The page.
4447 */
4448static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4449{
4450 /*
4451 * Map the shadow page and take action according to the page kind.
4452 */
4453 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
4454 switch (pPage->enmKind)
4455 {
4456 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4457 {
4458 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4459 void *pvGst;
4460 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4461 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4462 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4463 break;
4464 }
4465
4466 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4467 {
4468 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4469 void *pvGst;
4470 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4471 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
4472 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4473 break;
4474 }
4475
4476 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4477 {
4478 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4479 void *pvGst;
4480 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4481 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
4482 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4483 break;
4484 }
4485
4486 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4487 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4488 {
4489 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4490 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4491 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4492 break;
4493 }
4494
4495 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4496 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4497 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4498 {
4499 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4500 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
4501 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4502 break;
4503 }
4504
4505 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4506 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4507 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4508 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4509 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4510 case PGMPOOLKIND_PAE_PD_PHYS:
4511 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4512 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4513 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4514 break;
4515
4516 case PGMPOOLKIND_32BIT_PD_PHYS:
4517 case PGMPOOLKIND_32BIT_PD:
4518 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4519 break;
4520
4521 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4522 case PGMPOOLKIND_PAE_PDPT:
4523 case PGMPOOLKIND_PAE_PDPT_PHYS:
4524 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4525 break;
4526
4527 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4528 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4529 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4530 break;
4531
4532 case PGMPOOLKIND_64BIT_PML4:
4533 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4534 break;
4535
4536 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4537 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4538 break;
4539
4540 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4541 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4542 break;
4543
4544 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4545 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4546 break;
4547
4548 default:
4549 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4550 }
4551
4552 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4553 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4554 ASMMemZeroPage(pvShw);
4555 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4556 pPage->fZeroed = true;
4557 PGMPOOL_UNLOCK_PTR(pPool->CTX_SUFF(pVM), pvShw);
4558 Assert(!pPage->cPresent);
4559}
4560
4561/**
4562 * Flushes a pool page.
4563 *
4564 * This moves the page to the free list after removing all user references to it.
4565 *
4566 * @returns VBox status code.
4567 * @retval VINF_SUCCESS on success.
4568 * @param pPool The pool.
4569 * @param HCPhys The HC physical address of the shadow page.
4570 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4571 */
4572int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4573{
4574 PVM pVM = pPool->CTX_SUFF(pVM);
4575 bool fFlushRequired = false;
4576
4577 int rc = VINF_SUCCESS;
4578 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4579 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4580 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4581
4582 /*
4583 * Quietly reject any attempts at flushing any of the special root pages.
4584 */
4585 if (pPage->idx < PGMPOOL_IDX_FIRST)
4586 {
4587 AssertFailed(); /* can no longer happen */
4588 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4589 return VINF_SUCCESS;
4590 }
4591
4592 pgmLock(pVM);
4593
4594 /*
4595 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4596 */
4597 if (pgmPoolIsPageLocked(&pVM->pgm.s, pPage))
4598 {
4599 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4600 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4601 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4602 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4603 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4604 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4605 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4606 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4607 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD,
4608 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4609 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4610 pgmUnlock(pVM);
4611 return VINF_SUCCESS;
4612 }
4613
4614#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4615 /* Start a subset so we won't run out of mapping space. */
4616 PVMCPU pVCpu = VMMGetCpu(pVM);
4617 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4618#endif
4619
4620 /*
4621 * Mark the page as being in need of an ASMMemZeroPage().
4622 */
4623 pPage->fZeroed = false;
4624
4625#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4626 if (pPage->fDirty)
4627 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirty, false /* do not remove */);
4628#endif
4629
4630 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4631 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4632 fFlushRequired = true;
4633
4634 /*
4635 * Clear the page.
4636 */
4637 pgmPoolTrackClearPageUsers(pPool, pPage);
4638 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4639 pgmPoolTrackDeref(pPool, pPage);
4640 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4641
4642 /*
4643 * Flush it from the cache.
4644 */
4645 pgmPoolCacheFlushPage(pPool, pPage);
4646
4647#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4648 /* Heavy stuff done. */
4649 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4650#endif
4651
4652 /*
4653 * Deregistering the monitoring.
4654 */
4655 if (pPage->fMonitored)
4656 rc = pgmPoolMonitorFlush(pPool, pPage);
4657
4658 /*
4659 * Free the page.
4660 */
4661 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4662 pPage->iNext = pPool->iFreeHead;
4663 pPool->iFreeHead = pPage->idx;
4664 pPage->enmKind = PGMPOOLKIND_FREE;
4665 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4666 pPage->GCPhys = NIL_RTGCPHYS;
4667 pPage->fReusedFlushPending = false;
4668
4669 pPool->cUsedPages--;
4670
4671 /* Flush the TLBs of all VCPUs if required. */
4672 if ( fFlushRequired
4673 && fFlush)
4674 {
4675 PGM_INVL_ALL_VCPU_TLBS(pVM);
4676 }
4677
4678 pgmUnlock(pVM);
4679 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4680 return rc;
4681}
4682
4683
4684/**
4685 * Frees a usage of a pool page.
4686 *
4687 * The caller is responsible to updating the user table so that it no longer
4688 * references the shadow page.
4689 *
4690 * @param pPool The pool.
4691 * @param HCPhys The HC physical address of the shadow page.
4692 * @param iUser The shadow page pool index of the user table.
4693 * @param iUserTable The index into the user table (shadowed).
4694 */
4695void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4696{
4697 PVM pVM = pPool->CTX_SUFF(pVM);
4698
4699 STAM_PROFILE_START(&pPool->StatFree, a);
4700 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%#x iUserTable=%#x\n",
4701 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4702 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4703 pgmLock(pVM);
4704 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4705 if (!pPage->fCached)
4706 pgmPoolFlushPage(pPool, pPage);
4707 pgmUnlock(pVM);
4708 STAM_PROFILE_STOP(&pPool->StatFree, a);
4709}
4710
4711
4712/**
4713 * Makes one or more free page free.
4714 *
4715 * @returns VBox status code.
4716 * @retval VINF_SUCCESS on success.
4717 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4718 *
4719 * @param pPool The pool.
4720 * @param enmKind Page table kind
4721 * @param iUser The user of the page.
4722 */
4723static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4724{
4725 PVM pVM = pPool->CTX_SUFF(pVM);
4726
4727 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
4728
4729 /*
4730 * If the pool isn't full grown yet, expand it.
4731 */
4732 if ( pPool->cCurPages < pPool->cMaxPages
4733#if defined(IN_RC)
4734 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
4735 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4736 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
4737#endif
4738 )
4739 {
4740 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4741#ifdef IN_RING3
4742 int rc = PGMR3PoolGrow(pVM);
4743#else
4744 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
4745#endif
4746 if (RT_FAILURE(rc))
4747 return rc;
4748 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4749 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4750 return VINF_SUCCESS;
4751 }
4752
4753 /*
4754 * Free one cached page.
4755 */
4756 return pgmPoolCacheFreeOne(pPool, iUser);
4757}
4758
4759/**
4760 * Allocates a page from the pool.
4761 *
4762 * This page may actually be a cached page and not in need of any processing
4763 * on the callers part.
4764 *
4765 * @returns VBox status code.
4766 * @retval VINF_SUCCESS if a NEW page was allocated.
4767 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4768 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4769 * @param pVM The VM handle.
4770 * @param GCPhys The GC physical address of the page we're gonna shadow.
4771 * For 4MB and 2MB PD entries, it's the first address the
4772 * shadow PT is covering.
4773 * @param enmKind The kind of mapping.
4774 * @param enmAccess Access type for the mapping (only relevant for big pages)
4775 * @param iUser The shadow page pool index of the user table.
4776 * @param iUserTable The index into the user table (shadowed).
4777 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4778 * @param fLockPage Lock the page
4779 */
4780int pgmPoolAllocEx(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage, bool fLockPage)
4781{
4782 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4783 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4784 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%#x iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4785 *ppPage = NULL;
4786 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4787 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4788 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4789
4790 pgmLock(pVM);
4791
4792 if (pPool->fCacheEnabled)
4793 {
4794 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, iUser, iUserTable, ppPage);
4795 if (RT_SUCCESS(rc2))
4796 {
4797 if (fLockPage)
4798 pgmPoolLockPage(pPool, *ppPage);
4799 pgmUnlock(pVM);
4800 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4801 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4802 return rc2;
4803 }
4804 }
4805
4806 /*
4807 * Allocate a new one.
4808 */
4809 int rc = VINF_SUCCESS;
4810 uint16_t iNew = pPool->iFreeHead;
4811 if (iNew == NIL_PGMPOOL_IDX)
4812 {
4813 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
4814 if (RT_FAILURE(rc))
4815 {
4816 pgmUnlock(pVM);
4817 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4818 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4819 return rc;
4820 }
4821 iNew = pPool->iFreeHead;
4822 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4823 }
4824
4825 /* unlink the free head */
4826 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4827 pPool->iFreeHead = pPage->iNext;
4828 pPage->iNext = NIL_PGMPOOL_IDX;
4829
4830 /*
4831 * Initialize it.
4832 */
4833 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4834 pPage->enmKind = enmKind;
4835 pPage->enmAccess = enmAccess;
4836 pPage->GCPhys = GCPhys;
4837 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4838 pPage->fMonitored = false;
4839 pPage->fCached = false;
4840#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4841 pPage->fDirty = false;
4842#endif
4843 pPage->fReusedFlushPending = false;
4844 pPage->cModifications = 0;
4845 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4846 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4847 pPage->cLocked = 0;
4848 pPage->cPresent = 0;
4849 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
4850 pPage->pvLastAccessHandlerFault = 0;
4851 pPage->cLastAccessHandlerCount = 0;
4852 pPage->pvLastAccessHandlerRip = 0;
4853
4854 /*
4855 * Insert into the tracking and cache. If this fails, free the page.
4856 */
4857 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4858 if (RT_FAILURE(rc3))
4859 {
4860 pPool->cUsedPages--;
4861 pPage->enmKind = PGMPOOLKIND_FREE;
4862 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4863 pPage->GCPhys = NIL_RTGCPHYS;
4864 pPage->iNext = pPool->iFreeHead;
4865 pPool->iFreeHead = pPage->idx;
4866 pgmUnlock(pVM);
4867 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4868 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4869 return rc3;
4870 }
4871
4872 /*
4873 * Commit the allocation, clear the page and return.
4874 */
4875#ifdef VBOX_WITH_STATISTICS
4876 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4877 pPool->cUsedPagesHigh = pPool->cUsedPages;
4878#endif
4879
4880 if (!pPage->fZeroed)
4881 {
4882 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4883 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4884 ASMMemZeroPage(pv);
4885 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4886 }
4887
4888 *ppPage = pPage;
4889 if (fLockPage)
4890 pgmPoolLockPage(pPool, pPage);
4891 pgmUnlock(pVM);
4892 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4893 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4894 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4895 return rc;
4896}
4897
4898
4899/**
4900 * Frees a usage of a pool page.
4901 *
4902 * @param pVM The VM handle.
4903 * @param HCPhys The HC physical address of the shadow page.
4904 * @param iUser The shadow page pool index of the user table.
4905 * @param iUserTable The index into the user table (shadowed).
4906 */
4907void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4908{
4909 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4910 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4911 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4912}
4913
4914/**
4915 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
4916 *
4917 * @returns Pointer to the shadow page structure.
4918 * @param pPool The pool.
4919 * @param HCPhys The HC physical address of the shadow page.
4920 */
4921PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
4922{
4923 PVM pVM = pPool->CTX_SUFF(pVM);
4924
4925 Assert(PGMIsLockOwner(pVM));
4926
4927 /*
4928 * Look up the page.
4929 */
4930 pgmLock(pVM);
4931 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
4932 pgmUnlock(pVM);
4933
4934 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
4935 return pPage;
4936}
4937
4938#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
4939/**
4940 * Flush the specified page if present
4941 *
4942 * @param pVM The VM handle.
4943 * @param GCPhys Guest physical address of the page to flush
4944 */
4945void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
4946{
4947 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4948
4949 VM_ASSERT_EMT(pVM);
4950
4951 /*
4952 * Look up the GCPhys in the hash.
4953 */
4954 GCPhys = GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
4955 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
4956 if (i == NIL_PGMPOOL_IDX)
4957 return;
4958
4959 do
4960 {
4961 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4962 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
4963 {
4964 switch (pPage->enmKind)
4965 {
4966 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4967 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4968 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4969 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4970 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4971 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4972 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4973 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4974 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4975 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4976 case PGMPOOLKIND_64BIT_PML4:
4977 case PGMPOOLKIND_32BIT_PD:
4978 case PGMPOOLKIND_PAE_PDPT:
4979 {
4980 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
4981#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4982 if (pPage->fDirty)
4983 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
4984 else
4985#endif
4986 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
4987 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
4988 pgmPoolMonitorChainFlush(pPool, pPage);
4989 return;
4990 }
4991
4992 /* ignore, no monitoring. */
4993 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4994 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4995 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4996 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4997 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4998 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4999 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5000 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5001 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5002 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5003 case PGMPOOLKIND_ROOT_NESTED:
5004 case PGMPOOLKIND_PAE_PD_PHYS:
5005 case PGMPOOLKIND_PAE_PDPT_PHYS:
5006 case PGMPOOLKIND_32BIT_PD_PHYS:
5007 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5008 break;
5009
5010 default:
5011 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5012 }
5013 }
5014
5015 /* next */
5016 i = pPage->iNext;
5017 } while (i != NIL_PGMPOOL_IDX);
5018 return;
5019}
5020#endif /* IN_RING3 */
5021
5022#ifdef IN_RING3
5023
5024
5025/**
5026 * Reset CPU on hot plugging.
5027 *
5028 * @param pVM The VM handle.
5029 * @param pVCpu The virtual CPU.
5030 */
5031void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5032{
5033 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
5034
5035 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5036 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5037 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5038}
5039
5040
5041/**
5042 * Flushes the entire cache.
5043 *
5044 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5045 * this and execute this CR3 flush.
5046 *
5047 * @param pPool The pool.
5048 */
5049void pgmR3PoolReset(PVM pVM)
5050{
5051 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5052
5053 Assert(PGMIsLockOwner(pVM));
5054 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5055 LogFlow(("pgmR3PoolReset:\n"));
5056
5057 /*
5058 * If there are no pages in the pool, there is nothing to do.
5059 */
5060 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5061 {
5062 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5063 return;
5064 }
5065
5066 /*
5067 * Exit the shadow mode since we're going to clear everything,
5068 * including the root page.
5069 */
5070 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5071 {
5072 PVMCPU pVCpu = &pVM->aCpus[i];
5073 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
5074 }
5075
5076 /*
5077 * Nuke the free list and reinsert all pages into it.
5078 */
5079 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5080 {
5081 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5082
5083 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
5084 if (pPage->fMonitored)
5085 pgmPoolMonitorFlush(pPool, pPage);
5086 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5087 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5088 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5089 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5090 pPage->cModifications = 0;
5091 pPage->GCPhys = NIL_RTGCPHYS;
5092 pPage->enmKind = PGMPOOLKIND_FREE;
5093 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5094 Assert(pPage->idx == i);
5095 pPage->iNext = i + 1;
5096 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5097 pPage->fSeenNonGlobal = false;
5098 pPage->fMonitored = false;
5099#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5100 pPage->fDirty = false;
5101#endif
5102 pPage->fCached = false;
5103 pPage->fReusedFlushPending = false;
5104 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5105 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5106 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5107 pPage->cLocked = 0;
5108 }
5109 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5110 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5111 pPool->cUsedPages = 0;
5112
5113 /*
5114 * Zap and reinitialize the user records.
5115 */
5116 pPool->cPresent = 0;
5117 pPool->iUserFreeHead = 0;
5118 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5119 const unsigned cMaxUsers = pPool->cMaxUsers;
5120 for (unsigned i = 0; i < cMaxUsers; i++)
5121 {
5122 paUsers[i].iNext = i + 1;
5123 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5124 paUsers[i].iUserTable = 0xfffffffe;
5125 }
5126 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5127
5128 /*
5129 * Clear all the GCPhys links and rebuild the phys ext free list.
5130 */
5131 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
5132 pRam;
5133 pRam = pRam->CTX_SUFF(pNext))
5134 {
5135 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5136 while (iPage-- > 0)
5137 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
5138 }
5139
5140 pPool->iPhysExtFreeHead = 0;
5141 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5142 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5143 for (unsigned i = 0; i < cMaxPhysExts; i++)
5144 {
5145 paPhysExts[i].iNext = i + 1;
5146 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5147 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5148 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5149 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5150 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5151 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5152 }
5153 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5154
5155 /*
5156 * Just zap the modified list.
5157 */
5158 pPool->cModifiedPages = 0;
5159 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5160
5161 /*
5162 * Clear the GCPhys hash and the age list.
5163 */
5164 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5165 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5166 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5167 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5168
5169#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5170 /* Clear all dirty pages. */
5171 pPool->idxFreeDirtyPage = 0;
5172 pPool->cDirtyPages = 0;
5173 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
5174 pPool->aIdxDirtyPages[i] = NIL_PGMPOOL_IDX;
5175#endif
5176
5177 /*
5178 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5179 */
5180 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
5181 {
5182 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5183 pPage->iNext = NIL_PGMPOOL_IDX;
5184 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5185 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5186 pPage->cModifications = 0;
5187 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
5188 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5189 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5190 if (pPage->fMonitored)
5191 {
5192 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
5193 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
5194 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
5195 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
5196 pPool->pszAccessHandler);
5197 AssertFatalRCSuccess(rc);
5198 pgmPoolHashInsert(pPool, pPage);
5199 }
5200 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
5201 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
5202 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
5203 }
5204
5205 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5206 {
5207 /*
5208 * Re-enter the shadowing mode and assert Sync CR3 FF.
5209 */
5210 PVMCPU pVCpu = &pVM->aCpus[i];
5211 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5212 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5213 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5214 }
5215
5216 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5217}
5218#endif /* IN_RING3 */
5219
5220#ifdef LOG_ENABLED
5221static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5222{
5223 switch(enmKind)
5224 {
5225 case PGMPOOLKIND_INVALID:
5226 return "PGMPOOLKIND_INVALID";
5227 case PGMPOOLKIND_FREE:
5228 return "PGMPOOLKIND_FREE";
5229 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5230 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5231 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5232 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5233 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5234 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5235 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5236 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5237 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5238 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5239 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5240 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5241 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5242 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5243 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5244 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5245 case PGMPOOLKIND_32BIT_PD:
5246 return "PGMPOOLKIND_32BIT_PD";
5247 case PGMPOOLKIND_32BIT_PD_PHYS:
5248 return "PGMPOOLKIND_32BIT_PD_PHYS";
5249 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5250 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5251 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5252 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5253 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5254 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5255 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5256 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5257 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5258 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5259 case PGMPOOLKIND_PAE_PD_PHYS:
5260 return "PGMPOOLKIND_PAE_PD_PHYS";
5261 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5262 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5263 case PGMPOOLKIND_PAE_PDPT:
5264 return "PGMPOOLKIND_PAE_PDPT";
5265 case PGMPOOLKIND_PAE_PDPT_PHYS:
5266 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5267 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5268 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5269 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5270 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5271 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5272 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5273 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5274 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5275 case PGMPOOLKIND_64BIT_PML4:
5276 return "PGMPOOLKIND_64BIT_PML4";
5277 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5278 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5279 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5280 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5281 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5282 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5283 case PGMPOOLKIND_ROOT_NESTED:
5284 return "PGMPOOLKIND_ROOT_NESTED";
5285 }
5286 return "Unknown kind!";
5287}
5288#endif /* LOG_ENABLED*/
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette