VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 27204

Last change on this file since 27204 was 27204, checked in by vboxsync, 15 years ago

Paranoia

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 196.5 KB
Line 
1/* $Id: PGMAllPool.cpp 27204 2010-03-09 11:11:22Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_RC
32# include <VBox/patm.h>
33#endif
34#include "../PGMInternal.h"
35#include <VBox/vm.h>
36#include "../PGMInline.h"
37#include <VBox/disopcode.h>
38#include <VBox/hwacc_vmx.h>
39
40#include <VBox/log.h>
41#include <VBox/err.h>
42#include <iprt/asm.h>
43#include <iprt/string.h>
44
45
46/*******************************************************************************
47* Internal Functions *
48*******************************************************************************/
49RT_C_DECLS_BEGIN
50static void pgmPoolFlushAllInt(PPGMPOOL pPool);
51DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
52DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
53static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
54static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
55static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
56#ifndef IN_RING3
57DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
58#endif
59#ifdef LOG_ENABLED
60static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
61#endif
62#if defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)
63static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT);
64#endif
65
66int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
67PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
68void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
69void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
70static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
71
72RT_C_DECLS_END
73
74
75/**
76 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
77 *
78 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
79 * @param enmKind The page kind.
80 */
81DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
82{
83 switch (enmKind)
84 {
85 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
86 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
87 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
88 return true;
89 default:
90 return false;
91 }
92}
93
94/** @def PGMPOOL_PAGE_2_LOCKED_PTR
95 * Maps a pool page pool into the current context and lock it (RC only).
96 *
97 * @returns VBox status code.
98 * @param pVM The VM handle.
99 * @param pPage The pool page.
100 *
101 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
102 * small page window employeed by that function. Be careful.
103 * @remark There is no need to assert on the result.
104 */
105#if defined(IN_RC)
106DECLINLINE(void *) PGMPOOL_PAGE_2_LOCKED_PTR(PVM pVM, PPGMPOOLPAGE pPage)
107{
108 void *pv = pgmPoolMapPageInlined(&pVM->pgm.s, pPage);
109
110 /* Make sure the dynamic mapping will not be reused. */
111 if (pv)
112 PGMDynLockHCPage(pVM, (uint8_t *)pv);
113
114 return pv;
115}
116#else
117# define PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage) PGMPOOL_PAGE_2_PTR(pVM, pPage)
118#endif
119
120/** @def PGMPOOL_UNLOCK_PTR
121 * Unlock a previously locked dynamic caching (RC only).
122 *
123 * @returns VBox status code.
124 * @param pVM The VM handle.
125 * @param pPage The pool page.
126 *
127 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
128 * small page window employeed by that function. Be careful.
129 * @remark There is no need to assert on the result.
130 */
131#if defined(IN_RC)
132DECLINLINE(void) PGMPOOL_UNLOCK_PTR(PVM pVM, void *pvPage)
133{
134 if (pvPage)
135 PGMDynUnlockHCPage(pVM, (uint8_t *)pvPage);
136}
137#else
138# define PGMPOOL_UNLOCK_PTR(pVM, pPage) do {} while (0)
139#endif
140
141
142/**
143 * Flushes a chain of pages sharing the same access monitor.
144 *
145 * @returns VBox status code suitable for scheduling.
146 * @param pPool The pool.
147 * @param pPage A page in the chain.
148 */
149int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
150{
151 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
152
153 /*
154 * Find the list head.
155 */
156 uint16_t idx = pPage->idx;
157 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
158 {
159 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
160 {
161 idx = pPage->iMonitoredPrev;
162 Assert(idx != pPage->idx);
163 pPage = &pPool->aPages[idx];
164 }
165 }
166
167 /*
168 * Iterate the list flushing each shadow page.
169 */
170 int rc = VINF_SUCCESS;
171 for (;;)
172 {
173 idx = pPage->iMonitoredNext;
174 Assert(idx != pPage->idx);
175 if (pPage->idx >= PGMPOOL_IDX_FIRST)
176 {
177 int rc2 = pgmPoolFlushPage(pPool, pPage);
178 AssertRC(rc2);
179 }
180 /* next */
181 if (idx == NIL_PGMPOOL_IDX)
182 break;
183 pPage = &pPool->aPages[idx];
184 }
185 return rc;
186}
187
188
189/**
190 * Wrapper for getting the current context pointer to the entry being modified.
191 *
192 * @returns VBox status code suitable for scheduling.
193 * @param pVM VM Handle.
194 * @param pvDst Destination address
195 * @param pvSrc Source guest virtual address.
196 * @param GCPhysSrc The source guest physical address.
197 * @param cb Size of data to read
198 */
199DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
200{
201#if defined(IN_RING3)
202 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
203 return VINF_SUCCESS;
204#else
205 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
206 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
207#endif
208}
209
210/**
211 * Process shadow entries before they are changed by the guest.
212 *
213 * For PT entries we will clear them. For PD entries, we'll simply check
214 * for mapping conflicts and set the SyncCR3 FF if found.
215 *
216 * @param pVCpu VMCPU handle
217 * @param pPool The pool.
218 * @param pPage The head page.
219 * @param GCPhysFault The guest physical fault address.
220 * @param uAddress In R0 and GC this is the guest context fault address (flat).
221 * In R3 this is the host context 'fault' address.
222 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
223 */
224void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, unsigned cbWrite)
225{
226 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%#x (idx=%#x)\n", pPage->iMonitoredPrev, pPage->idx));
227 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
228 PVM pVM = pPool->CTX_SUFF(pVM);
229
230 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))pvAddress, GCPhysFault, cbWrite));
231
232 for (;;)
233 {
234 union
235 {
236 void *pv;
237 PX86PT pPT;
238 PX86PTPAE pPTPae;
239 PX86PD pPD;
240 PX86PDPAE pPDPae;
241 PX86PDPT pPDPT;
242 PX86PML4 pPML4;
243 } uShw;
244
245 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s\n", pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
246
247 uShw.pv = NULL;
248 switch (pPage->enmKind)
249 {
250 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
251 {
252 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
253 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
254 const unsigned iShw = off / sizeof(X86PTE);
255 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
256 if (uShw.pPT->a[iShw].n.u1Present)
257 {
258 X86PTE GstPte;
259
260 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
261 AssertRC(rc);
262 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
263 pgmPoolTracDerefGCPhysHint(pPool, pPage,
264 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
265 GstPte.u & X86_PTE_PG_MASK);
266 ASMAtomicWriteSize(&uShw.pPT->a[iShw], 0);
267 }
268 break;
269 }
270
271 /* page/2 sized */
272 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
273 {
274 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
275 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
276 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
277 {
278 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
279 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
280 if (uShw.pPTPae->a[iShw].n.u1Present)
281 {
282 X86PTE GstPte;
283 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
284 AssertRC(rc);
285
286 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
287 pgmPoolTracDerefGCPhysHint(pPool, pPage,
288 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
289 GstPte.u & X86_PTE_PG_MASK);
290 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw], 0);
291 }
292 }
293 break;
294 }
295
296 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
297 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
298 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
299 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
300 {
301 unsigned iGst = off / sizeof(X86PDE);
302 unsigned iShwPdpt = iGst / 256;
303 unsigned iShw = (iGst % 256) * 2;
304 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
305
306 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
307 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
308 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
309 {
310 for (unsigned i = 0; i < 2; i++)
311 {
312# ifndef IN_RING0
313 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
314 {
315 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
316 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
317 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
318 break;
319 }
320 else
321# endif /* !IN_RING0 */
322 if (uShw.pPDPae->a[iShw+i].n.u1Present)
323 {
324 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
325 pgmPoolFree(pVM,
326 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
327 pPage->idx,
328 iShw + i);
329 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw+i], 0);
330 }
331
332 /* paranoia / a bit assumptive. */
333 if ( (off & 3)
334 && (off & 3) + cbWrite > 4)
335 {
336 const unsigned iShw2 = iShw + 2 + i;
337 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
338 {
339# ifndef IN_RING0
340 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
341 {
342 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
343 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
344 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
345 break;
346 }
347 else
348# endif /* !IN_RING0 */
349 if (uShw.pPDPae->a[iShw2].n.u1Present)
350 {
351 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
352 pgmPoolFree(pVM,
353 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
354 pPage->idx,
355 iShw2);
356 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
357 }
358 }
359 }
360 }
361 }
362 break;
363 }
364
365 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
366 {
367 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
368 const unsigned iShw = off / sizeof(X86PTEPAE);
369 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
370 if (uShw.pPTPae->a[iShw].n.u1Present)
371 {
372 X86PTEPAE GstPte;
373 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
374 AssertRC(rc);
375
376 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
377 pgmPoolTracDerefGCPhysHint(pPool, pPage,
378 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
379 GstPte.u & X86_PTE_PAE_PG_MASK);
380 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw].u, 0);
381 }
382
383 /* paranoia / a bit assumptive. */
384 if ( (off & 7)
385 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
386 {
387 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
388 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
389
390 if (uShw.pPTPae->a[iShw2].n.u1Present)
391 {
392 X86PTEPAE GstPte;
393# ifdef IN_RING3
394 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
395# else
396 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
397# endif
398 AssertRC(rc);
399 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
400 pgmPoolTracDerefGCPhysHint(pPool, pPage,
401 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
402 GstPte.u & X86_PTE_PAE_PG_MASK);
403 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw2].u ,0);
404 }
405 }
406 break;
407 }
408
409 case PGMPOOLKIND_32BIT_PD:
410 {
411 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
412 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
413
414 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
415 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
416# ifndef IN_RING0
417 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
418 {
419 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
420 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
421 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
422 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
423 break;
424 }
425# endif /* !IN_RING0 */
426# ifndef IN_RING0
427 else
428# endif /* !IN_RING0 */
429 {
430 if (uShw.pPD->a[iShw].n.u1Present)
431 {
432 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
433 pgmPoolFree(pVM,
434 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
435 pPage->idx,
436 iShw);
437 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
438 }
439 }
440 /* paranoia / a bit assumptive. */
441 if ( (off & 3)
442 && (off & 3) + cbWrite > sizeof(X86PTE))
443 {
444 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
445 if ( iShw2 != iShw
446 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
447 {
448# ifndef IN_RING0
449 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
450 {
451 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
452 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
453 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
454 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
455 break;
456 }
457# endif /* !IN_RING0 */
458# ifndef IN_RING0
459 else
460# endif /* !IN_RING0 */
461 {
462 if (uShw.pPD->a[iShw2].n.u1Present)
463 {
464 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
465 pgmPoolFree(pVM,
466 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
467 pPage->idx,
468 iShw2);
469 ASMAtomicWriteSize(&uShw.pPD->a[iShw2].u, 0);
470 }
471 }
472 }
473 }
474#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
475 if ( uShw.pPD->a[iShw].n.u1Present
476 && !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
477 {
478 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
479# ifdef IN_RC /* TLB load - we're pushing things a bit... */
480 ASMProbeReadByte(pvAddress);
481# endif
482 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
483 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
484 }
485#endif
486 break;
487 }
488
489 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
490 {
491 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
492 const unsigned iShw = off / sizeof(X86PDEPAE);
493 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
494#ifndef IN_RING0
495 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
496 {
497 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
498 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
499 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
500 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
501 break;
502 }
503#endif /* !IN_RING0 */
504 /*
505 * Causes trouble when the guest uses a PDE to refer to the whole page table level
506 * structure. (Invalidate here; faults later on when it tries to change the page
507 * table entries -> recheck; probably only applies to the RC case.)
508 */
509# ifndef IN_RING0
510 else
511# endif /* !IN_RING0 */
512 {
513 if (uShw.pPDPae->a[iShw].n.u1Present)
514 {
515 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
516 pgmPoolFree(pVM,
517 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
518 pPage->idx,
519 iShw);
520 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
521 }
522 }
523 /* paranoia / a bit assumptive. */
524 if ( (off & 7)
525 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
526 {
527 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
528 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
529
530#ifndef IN_RING0
531 if ( iShw2 != iShw
532 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
533 {
534 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
535 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
536 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
537 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
538 break;
539 }
540#endif /* !IN_RING0 */
541# ifndef IN_RING0
542 else
543# endif /* !IN_RING0 */
544 if (uShw.pPDPae->a[iShw2].n.u1Present)
545 {
546 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
547 pgmPoolFree(pVM,
548 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
549 pPage->idx,
550 iShw2);
551 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
552 }
553 }
554 break;
555 }
556
557 case PGMPOOLKIND_PAE_PDPT:
558 {
559 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
560 /*
561 * Hopefully this doesn't happen very often:
562 * - touching unused parts of the page
563 * - messing with the bits of pd pointers without changing the physical address
564 */
565 /* PDPT roots are not page aligned; 32 byte only! */
566 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
567
568 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
569 const unsigned iShw = offPdpt / sizeof(X86PDPE);
570 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
571 {
572# ifndef IN_RING0
573 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
574 {
575 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
576 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
577 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
578 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
579 break;
580 }
581# endif /* !IN_RING0 */
582# ifndef IN_RING0
583 else
584# endif /* !IN_RING0 */
585 if (uShw.pPDPT->a[iShw].n.u1Present)
586 {
587 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
588 pgmPoolFree(pVM,
589 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
590 pPage->idx,
591 iShw);
592 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
593 }
594
595 /* paranoia / a bit assumptive. */
596 if ( (offPdpt & 7)
597 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
598 {
599 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
600 if ( iShw2 != iShw
601 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
602 {
603# ifndef IN_RING0
604 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
605 {
606 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
607 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
608 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
609 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
610 break;
611 }
612# endif /* !IN_RING0 */
613# ifndef IN_RING0
614 else
615# endif /* !IN_RING0 */
616 if (uShw.pPDPT->a[iShw2].n.u1Present)
617 {
618 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
619 pgmPoolFree(pVM,
620 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
621 pPage->idx,
622 iShw2);
623 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
624 }
625 }
626 }
627 }
628 break;
629 }
630
631#ifndef IN_RC
632 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
633 {
634 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
635 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
636 const unsigned iShw = off / sizeof(X86PDEPAE);
637 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
638 if (uShw.pPDPae->a[iShw].n.u1Present)
639 {
640 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
641 pgmPoolFree(pVM,
642 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
643 pPage->idx,
644 iShw);
645 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
646 }
647 /* paranoia / a bit assumptive. */
648 if ( (off & 7)
649 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
650 {
651 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
652 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
653
654 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
655 if (uShw.pPDPae->a[iShw2].n.u1Present)
656 {
657 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
658 pgmPoolFree(pVM,
659 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
660 pPage->idx,
661 iShw2);
662 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
663 }
664 }
665 break;
666 }
667
668 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
669 {
670 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
671 /*
672 * Hopefully this doesn't happen very often:
673 * - messing with the bits of pd pointers without changing the physical address
674 */
675 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
676 const unsigned iShw = off / sizeof(X86PDPE);
677 if (uShw.pPDPT->a[iShw].n.u1Present)
678 {
679 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
680 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
681 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
682 }
683 /* paranoia / a bit assumptive. */
684 if ( (off & 7)
685 && (off & 7) + cbWrite > sizeof(X86PDPE))
686 {
687 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
688 if (uShw.pPDPT->a[iShw2].n.u1Present)
689 {
690 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
691 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
692 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
693 }
694 }
695 break;
696 }
697
698 case PGMPOOLKIND_64BIT_PML4:
699 {
700 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
701 /*
702 * Hopefully this doesn't happen very often:
703 * - messing with the bits of pd pointers without changing the physical address
704 */
705 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
706 const unsigned iShw = off / sizeof(X86PDPE);
707 if (uShw.pPML4->a[iShw].n.u1Present)
708 {
709 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
710 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
711 ASMAtomicWriteSize(&uShw.pPML4->a[iShw].u, 0);
712 }
713 /* paranoia / a bit assumptive. */
714 if ( (off & 7)
715 && (off & 7) + cbWrite > sizeof(X86PDPE))
716 {
717 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
718 if (uShw.pPML4->a[iShw2].n.u1Present)
719 {
720 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
721 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
722 ASMAtomicWriteSize(&uShw.pPML4->a[iShw2].u, 0);
723 }
724 }
725 break;
726 }
727#endif /* IN_RING0 */
728
729 default:
730 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
731 }
732 PGMPOOL_UNLOCK_PTR(pVM, uShw.pv);
733
734 /* next */
735 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
736 return;
737 pPage = &pPool->aPages[pPage->iMonitoredNext];
738 }
739}
740
741# ifndef IN_RING3
742/**
743 * Checks if a access could be a fork operation in progress.
744 *
745 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
746 *
747 * @returns true if it's likly that we're forking, otherwise false.
748 * @param pPool The pool.
749 * @param pDis The disassembled instruction.
750 * @param offFault The access offset.
751 */
752DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
753{
754 /*
755 * i386 linux is using btr to clear X86_PTE_RW.
756 * The functions involved are (2.6.16 source inspection):
757 * clear_bit
758 * ptep_set_wrprotect
759 * copy_one_pte
760 * copy_pte_range
761 * copy_pmd_range
762 * copy_pud_range
763 * copy_page_range
764 * dup_mmap
765 * dup_mm
766 * copy_mm
767 * copy_process
768 * do_fork
769 */
770 if ( pDis->pCurInstr->opcode == OP_BTR
771 && !(offFault & 4)
772 /** @todo Validate that the bit index is X86_PTE_RW. */
773 )
774 {
775 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
776 return true;
777 }
778 return false;
779}
780
781
782/**
783 * Determine whether the page is likely to have been reused.
784 *
785 * @returns true if we consider the page as being reused for a different purpose.
786 * @returns false if we consider it to still be a paging page.
787 * @param pVM VM Handle.
788 * @param pVCpu VMCPU Handle.
789 * @param pRegFrame Trap register frame.
790 * @param pDis The disassembly info for the faulting instruction.
791 * @param pvFault The fault address.
792 *
793 * @remark The REP prefix check is left to the caller because of STOSD/W.
794 */
795DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
796{
797#ifndef IN_RC
798 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
799 if ( HWACCMHasPendingIrq(pVM)
800 && (pRegFrame->rsp - pvFault) < 32)
801 {
802 /* Fault caused by stack writes while trying to inject an interrupt event. */
803 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
804 return true;
805 }
806#else
807 NOREF(pVM); NOREF(pvFault);
808#endif
809
810 LogFlow(("Reused instr %RGv %d at %RGv param1.flags=%x param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->opcode, pvFault, pDis->param1.flags, pDis->param1.base.reg_gen));
811
812 /* Non-supervisor mode write means it's used for something else. */
813 if (CPUMGetGuestCPL(pVCpu, pRegFrame) != 0)
814 return true;
815
816 switch (pDis->pCurInstr->opcode)
817 {
818 /* call implies the actual push of the return address faulted */
819 case OP_CALL:
820 Log4(("pgmPoolMonitorIsReused: CALL\n"));
821 return true;
822 case OP_PUSH:
823 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
824 return true;
825 case OP_PUSHF:
826 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
827 return true;
828 case OP_PUSHA:
829 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
830 return true;
831 case OP_FXSAVE:
832 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
833 return true;
834 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
835 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
836 return true;
837 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
838 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
839 return true;
840 case OP_MOVSWD:
841 case OP_STOSWD:
842 if ( pDis->prefix == (PREFIX_REP|PREFIX_REX)
843 && pRegFrame->rcx >= 0x40
844 )
845 {
846 Assert(pDis->mode == CPUMODE_64BIT);
847
848 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
849 return true;
850 }
851 return false;
852 }
853 if ( ( (pDis->param1.flags & USE_REG_GEN32)
854 || (pDis->param1.flags & USE_REG_GEN64))
855 && (pDis->param1.base.reg_gen == USE_REG_ESP))
856 {
857 Log4(("pgmPoolMonitorIsReused: ESP\n"));
858 return true;
859 }
860
861 return false;
862}
863
864/**
865 * Flushes the page being accessed.
866 *
867 * @returns VBox status code suitable for scheduling.
868 * @param pVM The VM handle.
869 * @param pVCpu The VMCPU handle.
870 * @param pPool The pool.
871 * @param pPage The pool page (head).
872 * @param pDis The disassembly of the write instruction.
873 * @param pRegFrame The trap register frame.
874 * @param GCPhysFault The fault address as guest physical address.
875 * @param pvFault The fault address.
876 */
877static int pgmPoolAccessHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
878 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
879{
880 /*
881 * First, do the flushing.
882 */
883 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
884
885 /*
886 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection). Must do this in raw mode (!); XP boot will fail otherwise
887 */
888 uint32_t cbWritten;
889 int rc2 = EMInterpretInstructionCPUEx(pVM, pVCpu, pDis, pRegFrame, pvFault, &cbWritten, EMCODETYPE_ALL);
890 if (RT_SUCCESS(rc2))
891 pRegFrame->rip += pDis->opsize;
892 else if (rc2 == VERR_EM_INTERPRETER)
893 {
894#ifdef IN_RC
895 if (PATMIsPatchGCAddr(pVM, pRegFrame->eip))
896 {
897 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
898 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
899 rc = VINF_SUCCESS;
900 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
901 }
902 else
903#endif
904 {
905 rc = VINF_EM_RAW_EMULATE_INSTR;
906 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
907 }
908 }
909 else
910 rc = rc2;
911
912 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
913 return rc;
914}
915
916/**
917 * Handles the STOSD write accesses.
918 *
919 * @returns VBox status code suitable for scheduling.
920 * @param pVM The VM handle.
921 * @param pPool The pool.
922 * @param pPage The pool page (head).
923 * @param pDis The disassembly of the write instruction.
924 * @param pRegFrame The trap register frame.
925 * @param GCPhysFault The fault address as guest physical address.
926 * @param pvFault The fault address.
927 */
928DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
929 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
930{
931 unsigned uIncrement = pDis->param1.size;
932
933 Assert(pDis->mode == CPUMODE_32BIT || pDis->mode == CPUMODE_64BIT);
934 Assert(pRegFrame->rcx <= 0x20);
935
936#ifdef VBOX_STRICT
937 if (pDis->opmode == CPUMODE_32BIT)
938 Assert(uIncrement == 4);
939 else
940 Assert(uIncrement == 8);
941#endif
942
943 Log3(("pgmPoolAccessHandlerSTOSD\n"));
944
945 /*
946 * Increment the modification counter and insert it into the list
947 * of modified pages the first time.
948 */
949 if (!pPage->cModifications++)
950 pgmPoolMonitorModifiedInsert(pPool, pPage);
951
952 /*
953 * Execute REP STOSD.
954 *
955 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
956 * write situation, meaning that it's safe to write here.
957 */
958 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
959 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
960 while (pRegFrame->rcx)
961 {
962#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
963 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
964 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
965 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
966#else
967 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
968#endif
969#ifdef IN_RC
970 *(uint32_t *)(uintptr_t)pu32 = pRegFrame->eax;
971#else
972 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
973#endif
974 pu32 += uIncrement;
975 GCPhysFault += uIncrement;
976 pRegFrame->rdi += uIncrement;
977 pRegFrame->rcx--;
978 }
979 pRegFrame->rip += pDis->opsize;
980
981 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
982 return VINF_SUCCESS;
983}
984
985
986/**
987 * Handles the simple write accesses.
988 *
989 * @returns VBox status code suitable for scheduling.
990 * @param pVM The VM handle.
991 * @param pVCpu The VMCPU handle.
992 * @param pPool The pool.
993 * @param pPage The pool page (head).
994 * @param pDis The disassembly of the write instruction.
995 * @param pRegFrame The trap register frame.
996 * @param GCPhysFault The fault address as guest physical address.
997 * @param pvFault The fault address.
998 * @param pfReused Reused state (out)
999 */
1000DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
1001 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
1002{
1003 Log3(("pgmPoolAccessHandlerSimple\n"));
1004 /*
1005 * Increment the modification counter and insert it into the list
1006 * of modified pages the first time.
1007 */
1008 if (!pPage->cModifications++)
1009 pgmPoolMonitorModifiedInsert(pPool, pPage);
1010
1011 /*
1012 * Clear all the pages. ASSUMES that pvFault is readable.
1013 */
1014#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1015 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1016 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
1017 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1018#else
1019 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
1020#endif
1021
1022 /*
1023 * Interpret the instruction.
1024 */
1025 uint32_t cb;
1026 int rc = EMInterpretInstructionCPUEx(pVM, pVCpu, pDis, pRegFrame, pvFault, &cb, EMCODETYPE_ALL);
1027 if (RT_SUCCESS(rc))
1028 pRegFrame->rip += pDis->opsize;
1029 else if (rc == VERR_EM_INTERPRETER)
1030 {
1031 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1032 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode));
1033 rc = VINF_EM_RAW_EMULATE_INSTR;
1034 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1035 }
1036
1037#if 0 /* experimental code */
1038 if (rc == VINF_SUCCESS)
1039 {
1040 switch (pPage->enmKind)
1041 {
1042 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1043 {
1044 X86PTEPAE GstPte;
1045 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1046 AssertRC(rc);
1047
1048 /* Check the new value written by the guest. If present and with a bogus physical address, then
1049 * it's fairly safe to assume the guest is reusing the PT.
1050 */
1051 if (GstPte.n.u1Present)
1052 {
1053 RTHCPHYS HCPhys = -1;
1054 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1055 if (rc != VINF_SUCCESS)
1056 {
1057 *pfReused = true;
1058 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1059 }
1060 }
1061 break;
1062 }
1063 }
1064 }
1065#endif
1066
1067 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
1068 return rc;
1069}
1070
1071/**
1072 * \#PF Handler callback for PT write accesses.
1073 *
1074 * @returns VBox status code (appropriate for GC return).
1075 * @param pVM VM Handle.
1076 * @param uErrorCode CPU Error code.
1077 * @param pRegFrame Trap register frame.
1078 * NULL on DMA and other non CPU access.
1079 * @param pvFault The fault address (cr2).
1080 * @param GCPhysFault The GC physical address corresponding to pvFault.
1081 * @param pvUser User argument.
1082 */
1083DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1084{
1085 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1086 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1087 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1088 PVMCPU pVCpu = VMMGetCpu(pVM);
1089 unsigned cMaxModifications;
1090 bool fForcedFlush = false;
1091
1092 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1093
1094 pgmLock(pVM);
1095 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1096 {
1097 /* Pool page changed while we were waiting for the lock; ignore. */
1098 Log(("CPU%d: pgmPoolAccessHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1099 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1100 pgmUnlock(pVM);
1101 return VINF_SUCCESS;
1102 }
1103#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1104 if (pPage->fDirty)
1105 {
1106 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH));
1107 pgmUnlock(pVM);
1108 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1109 }
1110#endif
1111
1112#if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1113 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1114 {
1115 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1116 void *pvGst;
1117 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1118 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
1119 }
1120#endif
1121
1122 /*
1123 * Disassemble the faulting instruction.
1124 */
1125 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1126 int rc = EMInterpretDisasOne(pVM, pVCpu, pRegFrame, pDis, NULL);
1127 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1128 {
1129 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1130 pgmUnlock(pVM);
1131 return rc;
1132 }
1133
1134 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1135
1136 /*
1137 * We should ALWAYS have the list head as user parameter. This
1138 * is because we use that page to record the changes.
1139 */
1140 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1141
1142#ifdef IN_RING0
1143 /* Maximum nr of modifications depends on the page type. */
1144 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1145 cMaxModifications = 4;
1146 else
1147 cMaxModifications = 24;
1148#else
1149 cMaxModifications = 48;
1150#endif
1151
1152 /*
1153 * Incremental page table updates should weight more than random ones.
1154 * (Only applies when started from offset 0)
1155 */
1156 pVCpu->pgm.s.cPoolAccessHandler++;
1157 if ( pPage->pvLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1158 && pPage->pvLastAccessHandlerRip < pRegFrame->rip + 0x40
1159 && pvFault == (pPage->pvLastAccessHandlerFault + pDis->param1.size)
1160 && pVCpu->pgm.s.cPoolAccessHandler == (pPage->cLastAccessHandlerCount + 1))
1161 {
1162 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1163 pPage->cModifications = pPage->cModifications * 2;
1164 pPage->pvLastAccessHandlerFault = pvFault;
1165 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1166 if (pPage->cModifications >= cMaxModifications)
1167 {
1168 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1169 fForcedFlush = true;
1170 }
1171 }
1172
1173 if (pPage->cModifications >= cMaxModifications)
1174 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1175
1176 /*
1177 * Check if it's worth dealing with.
1178 */
1179 bool fReused = false;
1180 bool fNotReusedNotForking = false;
1181 if ( ( pPage->cModifications < cMaxModifications /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1182 || pgmPoolIsPageLocked(&pVM->pgm.s, pPage)
1183 )
1184 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1185 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1186 {
1187 /*
1188 * Simple instructions, no REP prefix.
1189 */
1190 if (!(pDis->prefix & (PREFIX_REP | PREFIX_REPNE)))
1191 {
1192 rc = pgmPoolAccessHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1193 if (fReused)
1194 goto flushPage;
1195
1196 /* A mov instruction to change the first page table entry will be remembered so we can detect
1197 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1198 */
1199 if ( rc == VINF_SUCCESS
1200 && pDis->pCurInstr->opcode == OP_MOV
1201 && (pvFault & PAGE_OFFSET_MASK) == 0)
1202 {
1203 pPage->pvLastAccessHandlerFault = pvFault;
1204 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1205 pPage->pvLastAccessHandlerRip = pRegFrame->rip;
1206 /* Make sure we don't kick out a page too quickly. */
1207 if (pPage->cModifications > 8)
1208 pPage->cModifications = 2;
1209 }
1210 else
1211 if (pPage->pvLastAccessHandlerFault == pvFault)
1212 {
1213 /* ignore the 2nd write to this page table entry. */
1214 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1215 }
1216 else
1217 {
1218 pPage->pvLastAccessHandlerFault = 0;
1219 pPage->pvLastAccessHandlerRip = 0;
1220 }
1221
1222 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1223 pgmUnlock(pVM);
1224 return rc;
1225 }
1226
1227 /*
1228 * Windows is frequently doing small memset() operations (netio test 4k+).
1229 * We have to deal with these or we'll kill the cache and performance.
1230 */
1231 if ( pDis->pCurInstr->opcode == OP_STOSWD
1232 && !pRegFrame->eflags.Bits.u1DF
1233 && pDis->opmode == pDis->mode
1234 && pDis->addrmode == pDis->mode)
1235 {
1236 bool fValidStosd = false;
1237
1238 if ( pDis->mode == CPUMODE_32BIT
1239 && pDis->prefix == PREFIX_REP
1240 && pRegFrame->ecx <= 0x20
1241 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1242 && !((uintptr_t)pvFault & 3)
1243 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1244 )
1245 {
1246 fValidStosd = true;
1247 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1248 }
1249 else
1250 if ( pDis->mode == CPUMODE_64BIT
1251 && pDis->prefix == (PREFIX_REP | PREFIX_REX)
1252 && pRegFrame->rcx <= 0x20
1253 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1254 && !((uintptr_t)pvFault & 7)
1255 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1256 )
1257 {
1258 fValidStosd = true;
1259 }
1260
1261 if (fValidStosd)
1262 {
1263 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1264 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1265 pgmUnlock(pVM);
1266 return rc;
1267 }
1268 }
1269
1270 /* REP prefix, don't bother. */
1271 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1272 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1273 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode, pDis->prefix));
1274 fNotReusedNotForking = true;
1275 }
1276
1277#if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1278 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1279 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1280 */
1281 if ( pPage->cModifications >= cMaxModifications
1282 && !fForcedFlush
1283 && pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1284 && ( fNotReusedNotForking
1285 || ( !pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
1286 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1287 )
1288 )
1289 {
1290 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
1291 Assert(pPage->fDirty == false);
1292
1293 /* Flush any monitored duplicates as we will disable write protection. */
1294 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1295 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1296 {
1297 PPGMPOOLPAGE pPageHead = pPage;
1298
1299 /* Find the monitor head. */
1300 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1301 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1302
1303 while (pPageHead)
1304 {
1305 unsigned idxNext = pPageHead->iMonitoredNext;
1306
1307 if (pPageHead != pPage)
1308 {
1309 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1310 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1311 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1312 AssertRC(rc2);
1313 }
1314
1315 if (idxNext == NIL_PGMPOOL_IDX)
1316 break;
1317
1318 pPageHead = &pPool->aPages[idxNext];
1319 }
1320 }
1321
1322 /* The flushing above might fail for locked pages, so double check. */
1323 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1324 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1325 {
1326 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1327
1328 /* Temporarily allow write access to the page table again. */
1329 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys, pPage->GCPhys);
1330 if (rc == VINF_SUCCESS)
1331 {
1332 rc = PGMShwModifyPage(pVCpu, pvFault, 1, X86_PTE_RW, ~(uint64_t)X86_PTE_RW);
1333 AssertMsg(rc == VINF_SUCCESS
1334 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1335 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1336 || rc == VERR_PAGE_NOT_PRESENT,
1337 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1338
1339 pPage->pvDirtyFault = pvFault;
1340
1341 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1342 pgmUnlock(pVM);
1343 return rc;
1344 }
1345 }
1346 }
1347#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1348
1349 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushModOverflow));
1350flushPage:
1351 /*
1352 * Not worth it, so flush it.
1353 *
1354 * If we considered it to be reused, don't go back to ring-3
1355 * to emulate failed instructions since we usually cannot
1356 * interpret then. This may be a bit risky, in which case
1357 * the reuse detection must be fixed.
1358 */
1359 rc = pgmPoolAccessHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1360 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1361 && fReused)
1362 {
1363 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1364 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1365 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1366 }
1367 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1368 pgmUnlock(pVM);
1369 return rc;
1370}
1371
1372# endif /* !IN_RING3 */
1373
1374# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1375
1376# ifdef VBOX_STRICT
1377/**
1378 * Check references to guest physical memory in a PAE / PAE page table.
1379 *
1380 * @param pPool The pool.
1381 * @param pPage The page.
1382 * @param pShwPT The shadow page table (mapping of the page).
1383 * @param pGstPT The guest page table.
1384 */
1385static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
1386{
1387 unsigned cErrors = 0;
1388 int LastRc = -1; /* initialized to shut up gcc */
1389 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1390 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1391
1392#ifdef VBOX_STRICT
1393 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1394 AssertMsg(!pShwPT->a[i].n.u1Present, ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, pShwPT->a[i].u, pPage->iFirstPresent));
1395#endif
1396 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1397 {
1398 if (pShwPT->a[i].n.u1Present)
1399 {
1400 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1401 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1402 if ( rc != VINF_SUCCESS
1403 || (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) != HCPhys)
1404 {
1405 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, pShwPT->a[i].u, HCPhys));
1406 LastPTE = i;
1407 LastRc = rc;
1408 LastHCPhys = HCPhys;
1409 cErrors++;
1410
1411 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1412 rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pPage->GCPhys, &HCPhysPT);
1413 AssertRC(rc);
1414
1415 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1416 {
1417 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1418
1419 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1420 {
1421 PX86PTPAE pShwPT2 = (PX86PTPAE)PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pTempPage);
1422
1423 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1424 {
1425 if ( pShwPT2->a[j].n.u1Present
1426 && pShwPT2->a[j].n.u1Write
1427 && ((pShwPT2->a[j].u & X86_PTE_PAE_PG_MASK) == HCPhysPT))
1428 {
1429 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, pShwPT->a[j].u, pShwPT2->a[j].u));
1430 }
1431 }
1432 }
1433 }
1434 }
1435 }
1436 }
1437 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, pShwPT->a[LastPTE].u, LastHCPhys));
1438}
1439# endif /* VBOX_STRICT */
1440
1441/**
1442 * Clear references to guest physical memory in a PAE / PAE page table.
1443 *
1444 * @returns nr of changed PTEs
1445 * @param pPool The pool.
1446 * @param pPage The page.
1447 * @param pShwPT The shadow page table (mapping of the page).
1448 * @param pGstPT The guest page table.
1449 * @param pOldGstPT The old cached guest page table.
1450 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1451 * @param pfFlush Flush reused page table (out)
1452 */
1453DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT, PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1454{
1455 unsigned cChanged = 0;
1456
1457#ifdef VBOX_STRICT
1458 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1459 AssertMsg(!pShwPT->a[i].n.u1Present, ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, pShwPT->a[i].u, pPage->iFirstPresent));
1460#endif
1461 *pfFlush = false;
1462
1463 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1464 {
1465 /* Check the new value written by the guest. If present and with a bogus physical address, then
1466 * it's fairly safe to assume the guest is reusing the PT.
1467 */
1468 if ( fAllowRemoval
1469 && pGstPT->a[i].n.u1Present)
1470 {
1471 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1472 {
1473 *pfFlush = true;
1474 return ++cChanged;
1475 }
1476 }
1477 if (pShwPT->a[i].n.u1Present)
1478 {
1479 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1480 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1481 {
1482#ifdef VBOX_STRICT
1483 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1484 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1485 AssertMsg(rc == VINF_SUCCESS && (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, pShwPT->a[i].u, HCPhys));
1486#endif
1487 uint64_t uHostAttr = pShwPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1488 bool fHostRW = !!(pShwPT->a[i].u & X86_PTE_RW);
1489 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1490 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1491
1492 if ( uHostAttr == uGuestAttr
1493 && fHostRW <= fGuestRW)
1494 continue;
1495 }
1496 cChanged++;
1497 /* Something was changed, so flush it. */
1498 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1499 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1500 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
1501 ASMAtomicWriteSize(&pShwPT->a[i].u, 0);
1502 }
1503 }
1504 return cChanged;
1505}
1506
1507
1508/**
1509 * Flush a dirty page
1510 *
1511 * @param pVM VM Handle.
1512 * @param pPool The pool.
1513 * @param idxSlot Dirty array slot index
1514 * @param fAllowRemoval Allow a reused page table to be removed
1515 */
1516static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1517{
1518 PPGMPOOLPAGE pPage;
1519 unsigned idxPage;
1520
1521 Assert(idxSlot < RT_ELEMENTS(pPool->aIdxDirtyPages));
1522 if (pPool->aIdxDirtyPages[idxSlot] == NIL_PGMPOOL_IDX)
1523 return;
1524
1525 idxPage = pPool->aIdxDirtyPages[idxSlot];
1526 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1527 pPage = &pPool->aPages[idxPage];
1528 Assert(pPage->idx == idxPage);
1529 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1530
1531 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1532 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1533
1534 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1535 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys);
1536 Assert(rc == VINF_SUCCESS);
1537 pPage->fDirty = false;
1538
1539#ifdef VBOX_STRICT
1540 uint64_t fFlags = 0;
1541 RTHCPHYS HCPhys;
1542 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->pvDirtyFault, &fFlags, &HCPhys);
1543 AssertMsg( ( rc == VINF_SUCCESS
1544 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1545 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1546 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1547 || rc == VERR_PAGE_NOT_PRESENT,
1548 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->pvDirtyFault, rc, fFlags));
1549#endif
1550
1551 /* Flush those PTEs that have changed. */
1552 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1553 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1554 void *pvGst;
1555 bool fFlush;
1556 rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1557 unsigned cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst, (PCX86PTPAE)&pPool->aDirtyPages[idxSlot][0], fAllowRemoval, &fFlush);
1558 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1559 /** Note: we might want to consider keeping the dirty page active in case there were many changes. */
1560
1561 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1562 Assert(pPage->cModifications);
1563 if (cChanges < 4)
1564 pPage->cModifications = 1; /* must use > 0 here */
1565 else
1566 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1567
1568 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1569 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages))
1570 pPool->idxFreeDirtyPage = idxSlot;
1571
1572 pPool->cDirtyPages--;
1573 pPool->aIdxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1574 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1575 if (fFlush)
1576 {
1577 Assert(fAllowRemoval);
1578 Log(("Flush reused page table!\n"));
1579 pgmPoolFlushPage(pPool, pPage);
1580 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1581 }
1582 else
1583 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1584}
1585
1586# ifndef IN_RING3
1587/**
1588 * Add a new dirty page
1589 *
1590 * @param pVM VM Handle.
1591 * @param pPool The pool.
1592 * @param pPage The page.
1593 */
1594void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1595{
1596 unsigned idxFree;
1597
1598 Assert(PGMIsLocked(pVM));
1599 AssertCompile(RT_ELEMENTS(pPool->aIdxDirtyPages) == 8 || RT_ELEMENTS(pPool->aIdxDirtyPages) == 16);
1600 Assert(!pPage->fDirty);
1601
1602 idxFree = pPool->idxFreeDirtyPage;
1603 Assert(idxFree < RT_ELEMENTS(pPool->aIdxDirtyPages));
1604 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1605
1606 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aIdxDirtyPages))
1607 {
1608 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1609 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1610 }
1611 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages));
1612 AssertMsg(pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1613
1614 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1615
1616 /* Make a copy of the guest page table as we require valid GCPhys addresses when removing
1617 * references to physical pages. (the HCPhys linear lookup is *extremely* expensive!)
1618 */
1619 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1620 void *pvGst;
1621 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1622 memcpy(&pPool->aDirtyPages[idxFree][0], pvGst, PAGE_SIZE);
1623#ifdef VBOX_STRICT
1624 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
1625#endif
1626
1627 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1628 pPage->fDirty = true;
1629 pPage->idxDirty = idxFree;
1630 pPool->aIdxDirtyPages[idxFree] = pPage->idx;
1631 pPool->cDirtyPages++;
1632
1633 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1634 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages)
1635 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1636 {
1637 unsigned i;
1638 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1639 {
1640 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1641 if (pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1642 {
1643 pPool->idxFreeDirtyPage = idxFree;
1644 break;
1645 }
1646 }
1647 Assert(i != RT_ELEMENTS(pPool->aIdxDirtyPages));
1648 }
1649
1650 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages) || pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1651 return;
1652}
1653# endif /* !IN_RING3 */
1654
1655/**
1656 * Check if the specified page is dirty (not write monitored)
1657 *
1658 * @return dirty or not
1659 * @param pVM VM Handle.
1660 * @param GCPhys Guest physical address
1661 */
1662bool pgmPoolIsDirtyPage(PVM pVM, RTGCPHYS GCPhys)
1663{
1664 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1665 Assert(PGMIsLocked(pVM));
1666 if (!pPool->cDirtyPages)
1667 return false;
1668
1669 GCPhys = GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1670
1671 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1672 {
1673 if (pPool->aIdxDirtyPages[i] != NIL_PGMPOOL_IDX)
1674 {
1675 PPGMPOOLPAGE pPage;
1676 unsigned idxPage = pPool->aIdxDirtyPages[i];
1677
1678 pPage = &pPool->aPages[idxPage];
1679 if (pPage->GCPhys == GCPhys)
1680 return true;
1681 }
1682 }
1683 return false;
1684}
1685
1686/**
1687 * Reset all dirty pages by reinstating page monitoring.
1688 *
1689 * @param pVM VM Handle.
1690 */
1691void pgmPoolResetDirtyPages(PVM pVM)
1692{
1693 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1694 Assert(PGMIsLocked(pVM));
1695 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1696
1697 if (!pPool->cDirtyPages)
1698 return;
1699
1700 Log(("pgmPoolResetDirtyPages\n"));
1701 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1702 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1703
1704 pPool->idxFreeDirtyPage = 0;
1705 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aIdxDirtyPages)
1706 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1707 {
1708 unsigned i;
1709 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1710 {
1711 if (pPool->aIdxDirtyPages[i] == NIL_PGMPOOL_IDX)
1712 {
1713 pPool->idxFreeDirtyPage = i;
1714 break;
1715 }
1716 }
1717 AssertMsg(i != RT_ELEMENTS(pPool->aIdxDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1718 }
1719
1720 Assert(pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages));
1721 return;
1722}
1723
1724/**
1725 * Reset all dirty pages by reinstating page monitoring.
1726 *
1727 * @param pVM VM Handle.
1728 * @param GCPhysPT Physical address of the page table
1729 */
1730void pgmPoolInvalidateDirtyPage(PVM pVM, RTGCPHYS GCPhysPT)
1731{
1732 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1733 Assert(PGMIsLocked(pVM));
1734 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1735 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aIdxDirtyPages);
1736
1737 if (!pPool->cDirtyPages)
1738 return;
1739
1740 GCPhysPT = GCPhysPT & ~(RTGCPHYS)(PAGE_SIZE - 1);
1741
1742 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1743 {
1744 if (pPool->aIdxDirtyPages[i] != NIL_PGMPOOL_IDX)
1745 {
1746 unsigned idxPage = pPool->aIdxDirtyPages[i];
1747
1748 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1749 if (pPage->GCPhys == GCPhysPT)
1750 {
1751 idxDirtyPage = i;
1752 break;
1753 }
1754 }
1755 }
1756
1757 if (idxDirtyPage != RT_ELEMENTS(pPool->aIdxDirtyPages))
1758 {
1759 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
1760 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aIdxDirtyPages)
1761 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1762 {
1763 unsigned i;
1764 for (i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1765 {
1766 if (pPool->aIdxDirtyPages[i] == NIL_PGMPOOL_IDX)
1767 {
1768 pPool->idxFreeDirtyPage = i;
1769 break;
1770 }
1771 }
1772 AssertMsg(i != RT_ELEMENTS(pPool->aIdxDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1773 }
1774 }
1775}
1776
1777# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1778
1779/**
1780 * Inserts a page into the GCPhys hash table.
1781 *
1782 * @param pPool The pool.
1783 * @param pPage The page.
1784 */
1785DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1786{
1787 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1788 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1789 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1790 pPage->iNext = pPool->aiHash[iHash];
1791 pPool->aiHash[iHash] = pPage->idx;
1792}
1793
1794
1795/**
1796 * Removes a page from the GCPhys hash table.
1797 *
1798 * @param pPool The pool.
1799 * @param pPage The page.
1800 */
1801DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1802{
1803 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1804 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1805 if (pPool->aiHash[iHash] == pPage->idx)
1806 pPool->aiHash[iHash] = pPage->iNext;
1807 else
1808 {
1809 uint16_t iPrev = pPool->aiHash[iHash];
1810 for (;;)
1811 {
1812 const int16_t i = pPool->aPages[iPrev].iNext;
1813 if (i == pPage->idx)
1814 {
1815 pPool->aPages[iPrev].iNext = pPage->iNext;
1816 break;
1817 }
1818 if (i == NIL_PGMPOOL_IDX)
1819 {
1820 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1821 break;
1822 }
1823 iPrev = i;
1824 }
1825 }
1826 pPage->iNext = NIL_PGMPOOL_IDX;
1827}
1828
1829
1830/**
1831 * Frees up one cache page.
1832 *
1833 * @returns VBox status code.
1834 * @retval VINF_SUCCESS on success.
1835 * @param pPool The pool.
1836 * @param iUser The user index.
1837 */
1838static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1839{
1840#ifndef IN_RC
1841 const PVM pVM = pPool->CTX_SUFF(pVM);
1842#endif
1843 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1844 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1845
1846 /*
1847 * Select one page from the tail of the age list.
1848 */
1849 PPGMPOOLPAGE pPage;
1850 for (unsigned iLoop = 0; ; iLoop++)
1851 {
1852 uint16_t iToFree = pPool->iAgeTail;
1853 if (iToFree == iUser)
1854 iToFree = pPool->aPages[iToFree].iAgePrev;
1855/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1856 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1857 {
1858 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1859 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1860 {
1861 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1862 continue;
1863 iToFree = i;
1864 break;
1865 }
1866 }
1867*/
1868 Assert(iToFree != iUser);
1869 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1870 pPage = &pPool->aPages[iToFree];
1871
1872 /*
1873 * Reject any attempts at flushing the currently active shadow CR3 mapping.
1874 * Call pgmPoolCacheUsed to move the page to the head of the age list.
1875 */
1876 if (!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage))
1877 break;
1878 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
1879 pgmPoolCacheUsed(pPool, pPage);
1880 AssertLogRelReturn(iLoop < 8192, VERR_INTERNAL_ERROR);
1881 }
1882
1883 /*
1884 * Found a usable page, flush it and return.
1885 */
1886 int rc = pgmPoolFlushPage(pPool, pPage);
1887 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
1888 /* todo: find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
1889 if (rc == VINF_SUCCESS)
1890 PGM_INVL_ALL_VCPU_TLBS(pVM);
1891 return rc;
1892}
1893
1894
1895/**
1896 * Checks if a kind mismatch is really a page being reused
1897 * or if it's just normal remappings.
1898 *
1899 * @returns true if reused and the cached page (enmKind1) should be flushed
1900 * @returns false if not reused.
1901 * @param enmKind1 The kind of the cached page.
1902 * @param enmKind2 The kind of the requested page.
1903 */
1904static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1905{
1906 switch (enmKind1)
1907 {
1908 /*
1909 * Never reuse them. There is no remapping in non-paging mode.
1910 */
1911 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1912 case PGMPOOLKIND_32BIT_PD_PHYS:
1913 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1914 case PGMPOOLKIND_PAE_PD_PHYS:
1915 case PGMPOOLKIND_PAE_PDPT_PHYS:
1916 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1917 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1918 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1919 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1920 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1921 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
1922 return false;
1923
1924 /*
1925 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1926 */
1927 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1928 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1929 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1930 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1931 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1932 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1933 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1934 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1935 case PGMPOOLKIND_32BIT_PD:
1936 case PGMPOOLKIND_PAE_PDPT:
1937 switch (enmKind2)
1938 {
1939 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1940 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1941 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1942 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1943 case PGMPOOLKIND_64BIT_PML4:
1944 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1945 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1946 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1947 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1948 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1949 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1950 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1951 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1952 return true;
1953 default:
1954 return false;
1955 }
1956
1957 /*
1958 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1959 */
1960 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1961 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1962 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1963 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1964 case PGMPOOLKIND_64BIT_PML4:
1965 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1966 switch (enmKind2)
1967 {
1968 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1969 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1970 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1971 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1972 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1973 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1974 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1975 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1976 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1977 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1978 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1979 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1980 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1981 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1982 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1983 return true;
1984 default:
1985 return false;
1986 }
1987
1988 /*
1989 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1990 */
1991 case PGMPOOLKIND_ROOT_NESTED:
1992 return false;
1993
1994 default:
1995 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1996 }
1997}
1998
1999
2000/**
2001 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2002 *
2003 * @returns VBox status code.
2004 * @retval VINF_PGM_CACHED_PAGE on success.
2005 * @retval VERR_FILE_NOT_FOUND if not found.
2006 * @param pPool The pool.
2007 * @param GCPhys The GC physical address of the page we're gonna shadow.
2008 * @param enmKind The kind of mapping.
2009 * @param enmAccess Access type for the mapping (only relevant for big pages)
2010 * @param iUser The shadow page pool index of the user table.
2011 * @param iUserTable The index into the user table (shadowed).
2012 * @param ppPage Where to store the pointer to the page.
2013 */
2014static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2015{
2016#ifndef IN_RC
2017 const PVM pVM = pPool->CTX_SUFF(pVM);
2018#endif
2019 /*
2020 * Look up the GCPhys in the hash.
2021 */
2022 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2023 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%x iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2024 if (i != NIL_PGMPOOL_IDX)
2025 {
2026 do
2027 {
2028 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2029 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2030 if (pPage->GCPhys == GCPhys)
2031 {
2032 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2033 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess)
2034 {
2035 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2036 * doesn't flush it in case there are no more free use records.
2037 */
2038 pgmPoolCacheUsed(pPool, pPage);
2039
2040 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2041 if (RT_SUCCESS(rc))
2042 {
2043 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2044 *ppPage = pPage;
2045 if (pPage->cModifications)
2046 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2047 STAM_COUNTER_INC(&pPool->StatCacheHits);
2048 return VINF_PGM_CACHED_PAGE;
2049 }
2050 return rc;
2051 }
2052
2053 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2054 {
2055 /*
2056 * The kind is different. In some cases we should now flush the page
2057 * as it has been reused, but in most cases this is normal remapping
2058 * of PDs as PT or big pages using the GCPhys field in a slightly
2059 * different way than the other kinds.
2060 */
2061 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2062 {
2063 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2064 pgmPoolFlushPage(pPool, pPage);
2065 break;
2066 }
2067 }
2068 }
2069
2070 /* next */
2071 i = pPage->iNext;
2072 } while (i != NIL_PGMPOOL_IDX);
2073 }
2074
2075 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2076 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2077 return VERR_FILE_NOT_FOUND;
2078}
2079
2080
2081/**
2082 * Inserts a page into the cache.
2083 *
2084 * @param pPool The pool.
2085 * @param pPage The cached page.
2086 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2087 */
2088static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2089{
2090 /*
2091 * Insert into the GCPhys hash if the page is fit for that.
2092 */
2093 Assert(!pPage->fCached);
2094 if (fCanBeCached)
2095 {
2096 pPage->fCached = true;
2097 pgmPoolHashInsert(pPool, pPage);
2098 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2099 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2100 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2101 }
2102 else
2103 {
2104 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2105 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2106 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2107 }
2108
2109 /*
2110 * Insert at the head of the age list.
2111 */
2112 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2113 pPage->iAgeNext = pPool->iAgeHead;
2114 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2115 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2116 else
2117 pPool->iAgeTail = pPage->idx;
2118 pPool->iAgeHead = pPage->idx;
2119}
2120
2121
2122/**
2123 * Flushes a cached page.
2124 *
2125 * @param pPool The pool.
2126 * @param pPage The cached page.
2127 */
2128static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2129{
2130 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2131
2132 /*
2133 * Remove the page from the hash.
2134 */
2135 if (pPage->fCached)
2136 {
2137 pPage->fCached = false;
2138 pgmPoolHashRemove(pPool, pPage);
2139 }
2140 else
2141 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2142
2143 /*
2144 * Remove it from the age list.
2145 */
2146 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2147 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2148 else
2149 pPool->iAgeTail = pPage->iAgePrev;
2150 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2151 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2152 else
2153 pPool->iAgeHead = pPage->iAgeNext;
2154 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2155 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2156}
2157
2158
2159/**
2160 * Looks for pages sharing the monitor.
2161 *
2162 * @returns Pointer to the head page.
2163 * @returns NULL if not found.
2164 * @param pPool The Pool
2165 * @param pNewPage The page which is going to be monitored.
2166 */
2167static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2168{
2169 /*
2170 * Look up the GCPhys in the hash.
2171 */
2172 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
2173 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2174 if (i == NIL_PGMPOOL_IDX)
2175 return NULL;
2176 do
2177 {
2178 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2179 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2180 && pPage != pNewPage)
2181 {
2182 switch (pPage->enmKind)
2183 {
2184 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2185 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2186 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2187 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2188 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2189 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2190 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2191 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2192 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2193 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2194 case PGMPOOLKIND_64BIT_PML4:
2195 case PGMPOOLKIND_32BIT_PD:
2196 case PGMPOOLKIND_PAE_PDPT:
2197 {
2198 /* find the head */
2199 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2200 {
2201 Assert(pPage->iMonitoredPrev != pPage->idx);
2202 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2203 }
2204 return pPage;
2205 }
2206
2207 /* ignore, no monitoring. */
2208 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2209 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2210 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2211 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2212 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2213 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2214 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2215 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2216 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2217 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2218 case PGMPOOLKIND_ROOT_NESTED:
2219 case PGMPOOLKIND_PAE_PD_PHYS:
2220 case PGMPOOLKIND_PAE_PDPT_PHYS:
2221 case PGMPOOLKIND_32BIT_PD_PHYS:
2222 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2223 break;
2224 default:
2225 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2226 }
2227 }
2228
2229 /* next */
2230 i = pPage->iNext;
2231 } while (i != NIL_PGMPOOL_IDX);
2232 return NULL;
2233}
2234
2235
2236/**
2237 * Enabled write monitoring of a guest page.
2238 *
2239 * @returns VBox status code.
2240 * @retval VINF_SUCCESS on success.
2241 * @param pPool The pool.
2242 * @param pPage The cached page.
2243 */
2244static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2245{
2246 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
2247
2248 /*
2249 * Filter out the relevant kinds.
2250 */
2251 switch (pPage->enmKind)
2252 {
2253 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2254 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2255 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2256 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2257 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2258 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2259 case PGMPOOLKIND_64BIT_PML4:
2260 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2261 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2262 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2263 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2264 case PGMPOOLKIND_32BIT_PD:
2265 case PGMPOOLKIND_PAE_PDPT:
2266 break;
2267
2268 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2269 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2270 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2271 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2272 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2273 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2274 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2275 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2276 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2277 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2278 case PGMPOOLKIND_ROOT_NESTED:
2279 /* Nothing to monitor here. */
2280 return VINF_SUCCESS;
2281
2282 case PGMPOOLKIND_32BIT_PD_PHYS:
2283 case PGMPOOLKIND_PAE_PDPT_PHYS:
2284 case PGMPOOLKIND_PAE_PD_PHYS:
2285 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2286 /* Nothing to monitor here. */
2287 return VINF_SUCCESS;
2288 default:
2289 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2290 }
2291
2292 /*
2293 * Install handler.
2294 */
2295 int rc;
2296 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2297 if (pPageHead)
2298 {
2299 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2300 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2301
2302#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2303 if (pPageHead->fDirty)
2304 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirty, false /* do not remove */);
2305#endif
2306
2307 pPage->iMonitoredPrev = pPageHead->idx;
2308 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2309 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2310 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2311 pPageHead->iMonitoredNext = pPage->idx;
2312 rc = VINF_SUCCESS;
2313 }
2314 else
2315 {
2316 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2317 PVM pVM = pPool->CTX_SUFF(pVM);
2318 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
2319 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
2320 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
2321 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
2322 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
2323 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
2324 pPool->pszAccessHandler);
2325 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2326 * the heap size should suffice. */
2327 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2328 Assert(!(VMMGetCpu(pVM)->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(VMMGetCpu(pVM), VMCPU_FF_PGM_SYNC_CR3));
2329 }
2330 pPage->fMonitored = true;
2331 return rc;
2332}
2333
2334
2335/**
2336 * Disables write monitoring of a guest page.
2337 *
2338 * @returns VBox status code.
2339 * @retval VINF_SUCCESS on success.
2340 * @param pPool The pool.
2341 * @param pPage The cached page.
2342 */
2343static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2344{
2345 /*
2346 * Filter out the relevant kinds.
2347 */
2348 switch (pPage->enmKind)
2349 {
2350 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2351 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2352 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2353 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2354 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2355 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2356 case PGMPOOLKIND_64BIT_PML4:
2357 case PGMPOOLKIND_32BIT_PD:
2358 case PGMPOOLKIND_PAE_PDPT:
2359 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2360 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2361 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2362 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2363 break;
2364
2365 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2366 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2367 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2368 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2369 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2370 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2371 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2372 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2373 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2374 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2375 case PGMPOOLKIND_ROOT_NESTED:
2376 case PGMPOOLKIND_PAE_PD_PHYS:
2377 case PGMPOOLKIND_PAE_PDPT_PHYS:
2378 case PGMPOOLKIND_32BIT_PD_PHYS:
2379 /* Nothing to monitor here. */
2380 return VINF_SUCCESS;
2381
2382 default:
2383 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2384 }
2385
2386 /*
2387 * Remove the page from the monitored list or uninstall it if last.
2388 */
2389 const PVM pVM = pPool->CTX_SUFF(pVM);
2390 int rc;
2391 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2392 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2393 {
2394 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2395 {
2396 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2397 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2398 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
2399 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
2400 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
2401 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
2402 pPool->pszAccessHandler);
2403 AssertFatalRCSuccess(rc);
2404 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2405 }
2406 else
2407 {
2408 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2409 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2410 {
2411 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2412 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2413 }
2414 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2415 rc = VINF_SUCCESS;
2416 }
2417 }
2418 else
2419 {
2420 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
2421 AssertFatalRC(rc);
2422#ifdef VBOX_STRICT
2423 PVMCPU pVCpu = VMMGetCpu(pVM);
2424#endif
2425 AssertMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2426 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2427 }
2428 pPage->fMonitored = false;
2429
2430 /*
2431 * Remove it from the list of modified pages (if in it).
2432 */
2433 pgmPoolMonitorModifiedRemove(pPool, pPage);
2434
2435 return rc;
2436}
2437
2438
2439/**
2440 * Inserts the page into the list of modified pages.
2441 *
2442 * @param pPool The pool.
2443 * @param pPage The page.
2444 */
2445void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2446{
2447 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2448 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2449 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2450 && pPool->iModifiedHead != pPage->idx,
2451 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2452 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2453 pPool->iModifiedHead, pPool->cModifiedPages));
2454
2455 pPage->iModifiedNext = pPool->iModifiedHead;
2456 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2457 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2458 pPool->iModifiedHead = pPage->idx;
2459 pPool->cModifiedPages++;
2460#ifdef VBOX_WITH_STATISTICS
2461 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2462 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2463#endif
2464}
2465
2466
2467/**
2468 * Removes the page from the list of modified pages and resets the
2469 * moficiation counter.
2470 *
2471 * @param pPool The pool.
2472 * @param pPage The page which is believed to be in the list of modified pages.
2473 */
2474static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2475{
2476 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2477 if (pPool->iModifiedHead == pPage->idx)
2478 {
2479 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2480 pPool->iModifiedHead = pPage->iModifiedNext;
2481 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2482 {
2483 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2484 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2485 }
2486 pPool->cModifiedPages--;
2487 }
2488 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2489 {
2490 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2491 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2492 {
2493 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2494 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2495 }
2496 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2497 pPool->cModifiedPages--;
2498 }
2499 else
2500 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2501 pPage->cModifications = 0;
2502}
2503
2504
2505/**
2506 * Zaps the list of modified pages, resetting their modification counters in the process.
2507 *
2508 * @param pVM The VM handle.
2509 */
2510static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2511{
2512 pgmLock(pVM);
2513 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2514 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2515
2516 unsigned cPages = 0; NOREF(cPages);
2517
2518#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2519 pgmPoolResetDirtyPages(pVM);
2520#endif
2521
2522 uint16_t idx = pPool->iModifiedHead;
2523 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2524 while (idx != NIL_PGMPOOL_IDX)
2525 {
2526 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2527 idx = pPage->iModifiedNext;
2528 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2529 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2530 pPage->cModifications = 0;
2531 Assert(++cPages);
2532 }
2533 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2534 pPool->cModifiedPages = 0;
2535 pgmUnlock(pVM);
2536}
2537
2538
2539/**
2540 * Handle SyncCR3 pool tasks
2541 *
2542 * @returns VBox status code.
2543 * @retval VINF_SUCCESS if successfully added.
2544 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2545 * @param pVCpu The VMCPU handle.
2546 * @remark Should only be used when monitoring is available, thus placed in
2547 * the PGMPOOL_WITH_MONITORING #ifdef.
2548 */
2549int pgmPoolSyncCR3(PVMCPU pVCpu)
2550{
2551 PVM pVM = pVCpu->CTX_SUFF(pVM);
2552 LogFlow(("pgmPoolSyncCR3\n"));
2553
2554 /*
2555 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2556 * Occasionally we will have to clear all the shadow page tables because we wanted
2557 * to monitor a page which was mapped by too many shadowed page tables. This operation
2558 * sometimes refered to as a 'lightweight flush'.
2559 */
2560# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2561 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2562 pgmR3PoolClearAll(pVM);
2563# else /* !IN_RING3 */
2564 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2565 {
2566 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2567 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2568
2569 /* Make sure all other VCPUs return to ring 3. */
2570 if (pVM->cCpus > 1)
2571 {
2572 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2573 PGM_INVL_ALL_VCPU_TLBS(pVM);
2574 }
2575 return VINF_PGM_SYNC_CR3;
2576 }
2577# endif /* !IN_RING3 */
2578 else
2579 pgmPoolMonitorModifiedClearAll(pVM);
2580
2581 return VINF_SUCCESS;
2582}
2583
2584
2585/**
2586 * Frees up at least one user entry.
2587 *
2588 * @returns VBox status code.
2589 * @retval VINF_SUCCESS if successfully added.
2590 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2591 * @param pPool The pool.
2592 * @param iUser The user index.
2593 */
2594static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2595{
2596 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2597 /*
2598 * Just free cached pages in a braindead fashion.
2599 */
2600 /** @todo walk the age list backwards and free the first with usage. */
2601 int rc = VINF_SUCCESS;
2602 do
2603 {
2604 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2605 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2606 rc = rc2;
2607 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2608 return rc;
2609}
2610
2611
2612/**
2613 * Inserts a page into the cache.
2614 *
2615 * This will create user node for the page, insert it into the GCPhys
2616 * hash, and insert it into the age list.
2617 *
2618 * @returns VBox status code.
2619 * @retval VINF_SUCCESS if successfully added.
2620 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2621 * @param pPool The pool.
2622 * @param pPage The cached page.
2623 * @param GCPhys The GC physical address of the page we're gonna shadow.
2624 * @param iUser The user index.
2625 * @param iUserTable The user table index.
2626 */
2627DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2628{
2629 int rc = VINF_SUCCESS;
2630 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2631
2632 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser %x iUserTable %x\n", GCPhys, iUser, iUserTable));
2633
2634#ifdef VBOX_STRICT
2635 /*
2636 * Check that the entry doesn't already exists.
2637 */
2638 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2639 {
2640 uint16_t i = pPage->iUserHead;
2641 do
2642 {
2643 Assert(i < pPool->cMaxUsers);
2644 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2645 i = paUsers[i].iNext;
2646 } while (i != NIL_PGMPOOL_USER_INDEX);
2647 }
2648#endif
2649
2650 /*
2651 * Find free a user node.
2652 */
2653 uint16_t i = pPool->iUserFreeHead;
2654 if (i == NIL_PGMPOOL_USER_INDEX)
2655 {
2656 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2657 if (RT_FAILURE(rc))
2658 return rc;
2659 i = pPool->iUserFreeHead;
2660 }
2661
2662 /*
2663 * Unlink the user node from the free list,
2664 * initialize and insert it into the user list.
2665 */
2666 pPool->iUserFreeHead = paUsers[i].iNext;
2667 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2668 paUsers[i].iUser = iUser;
2669 paUsers[i].iUserTable = iUserTable;
2670 pPage->iUserHead = i;
2671
2672 /*
2673 * Insert into cache and enable monitoring of the guest page if enabled.
2674 *
2675 * Until we implement caching of all levels, including the CR3 one, we'll
2676 * have to make sure we don't try monitor & cache any recursive reuse of
2677 * a monitored CR3 page. Because all windows versions are doing this we'll
2678 * have to be able to do combined access monitoring, CR3 + PT and
2679 * PD + PT (guest PAE).
2680 *
2681 * Update:
2682 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2683 */
2684 const bool fCanBeMonitored = true;
2685 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2686 if (fCanBeMonitored)
2687 {
2688 rc = pgmPoolMonitorInsert(pPool, pPage);
2689 AssertRC(rc);
2690 }
2691 return rc;
2692}
2693
2694
2695/**
2696 * Adds a user reference to a page.
2697 *
2698 * This will move the page to the head of the
2699 *
2700 * @returns VBox status code.
2701 * @retval VINF_SUCCESS if successfully added.
2702 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2703 * @param pPool The pool.
2704 * @param pPage The cached page.
2705 * @param iUser The user index.
2706 * @param iUserTable The user table.
2707 */
2708static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2709{
2710 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2711
2712 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2713
2714# ifdef VBOX_STRICT
2715 /*
2716 * Check that the entry doesn't already exists. We only allow multiple users of top-level paging structures (SHW_POOL_ROOT_IDX).
2717 */
2718 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2719 {
2720 uint16_t i = pPage->iUserHead;
2721 do
2722 {
2723 Assert(i < pPool->cMaxUsers);
2724 AssertMsg(iUser != PGMPOOL_IDX_PD || iUser != PGMPOOL_IDX_PDPT || iUser != PGMPOOL_IDX_NESTED_ROOT || iUser != PGMPOOL_IDX_AMD64_CR3 ||
2725 paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2726 i = paUsers[i].iNext;
2727 } while (i != NIL_PGMPOOL_USER_INDEX);
2728 }
2729# endif
2730
2731 /*
2732 * Allocate a user node.
2733 */
2734 uint16_t i = pPool->iUserFreeHead;
2735 if (i == NIL_PGMPOOL_USER_INDEX)
2736 {
2737 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2738 if (RT_FAILURE(rc))
2739 return rc;
2740 i = pPool->iUserFreeHead;
2741 }
2742 pPool->iUserFreeHead = paUsers[i].iNext;
2743
2744 /*
2745 * Initialize the user node and insert it.
2746 */
2747 paUsers[i].iNext = pPage->iUserHead;
2748 paUsers[i].iUser = iUser;
2749 paUsers[i].iUserTable = iUserTable;
2750 pPage->iUserHead = i;
2751
2752# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2753 if (pPage->fDirty)
2754 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirty, false /* do not remove */);
2755# endif
2756
2757 /*
2758 * Tell the cache to update its replacement stats for this page.
2759 */
2760 pgmPoolCacheUsed(pPool, pPage);
2761 return VINF_SUCCESS;
2762}
2763
2764
2765/**
2766 * Frees a user record associated with a page.
2767 *
2768 * This does not clear the entry in the user table, it simply replaces the
2769 * user record to the chain of free records.
2770 *
2771 * @param pPool The pool.
2772 * @param HCPhys The HC physical address of the shadow page.
2773 * @param iUser The shadow page pool index of the user table.
2774 * @param iUserTable The index into the user table (shadowed).
2775 */
2776static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2777{
2778 /*
2779 * Unlink and free the specified user entry.
2780 */
2781 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2782
2783 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2784 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2785 uint16_t i = pPage->iUserHead;
2786 if ( i != NIL_PGMPOOL_USER_INDEX
2787 && paUsers[i].iUser == iUser
2788 && paUsers[i].iUserTable == iUserTable)
2789 {
2790 pPage->iUserHead = paUsers[i].iNext;
2791
2792 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2793 paUsers[i].iNext = pPool->iUserFreeHead;
2794 pPool->iUserFreeHead = i;
2795 return;
2796 }
2797
2798 /* General: Linear search. */
2799 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2800 while (i != NIL_PGMPOOL_USER_INDEX)
2801 {
2802 if ( paUsers[i].iUser == iUser
2803 && paUsers[i].iUserTable == iUserTable)
2804 {
2805 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2806 paUsers[iPrev].iNext = paUsers[i].iNext;
2807 else
2808 pPage->iUserHead = paUsers[i].iNext;
2809
2810 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2811 paUsers[i].iNext = pPool->iUserFreeHead;
2812 pPool->iUserFreeHead = i;
2813 return;
2814 }
2815 iPrev = i;
2816 i = paUsers[i].iNext;
2817 }
2818
2819 /* Fatal: didn't find it */
2820 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2821 iUser, iUserTable, pPage->GCPhys));
2822}
2823
2824
2825/**
2826 * Gets the entry size of a shadow table.
2827 *
2828 * @param enmKind The kind of page.
2829 *
2830 * @returns The size of the entry in bytes. That is, 4 or 8.
2831 * @returns If the kind is not for a table, an assertion is raised and 0 is
2832 * returned.
2833 */
2834DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2835{
2836 switch (enmKind)
2837 {
2838 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2839 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2840 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2841 case PGMPOOLKIND_32BIT_PD:
2842 case PGMPOOLKIND_32BIT_PD_PHYS:
2843 return 4;
2844
2845 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2846 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2847 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2848 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2849 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2850 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2851 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2852 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2853 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2854 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2855 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2856 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2857 case PGMPOOLKIND_64BIT_PML4:
2858 case PGMPOOLKIND_PAE_PDPT:
2859 case PGMPOOLKIND_ROOT_NESTED:
2860 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2861 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2862 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2863 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2864 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2865 case PGMPOOLKIND_PAE_PD_PHYS:
2866 case PGMPOOLKIND_PAE_PDPT_PHYS:
2867 return 8;
2868
2869 default:
2870 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2871 }
2872}
2873
2874
2875/**
2876 * Gets the entry size of a guest table.
2877 *
2878 * @param enmKind The kind of page.
2879 *
2880 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2881 * @returns If the kind is not for a table, an assertion is raised and 0 is
2882 * returned.
2883 */
2884DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2885{
2886 switch (enmKind)
2887 {
2888 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2889 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2890 case PGMPOOLKIND_32BIT_PD:
2891 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2892 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2893 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2894 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2895 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2896 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2897 return 4;
2898
2899 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2900 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2901 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2902 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2903 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2904 case PGMPOOLKIND_64BIT_PML4:
2905 case PGMPOOLKIND_PAE_PDPT:
2906 return 8;
2907
2908 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2909 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2910 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2911 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2912 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2913 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2914 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2915 case PGMPOOLKIND_ROOT_NESTED:
2916 case PGMPOOLKIND_PAE_PD_PHYS:
2917 case PGMPOOLKIND_PAE_PDPT_PHYS:
2918 case PGMPOOLKIND_32BIT_PD_PHYS:
2919 /** @todo can we return 0? (nobody is calling this...) */
2920 AssertFailed();
2921 return 0;
2922
2923 default:
2924 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2925 }
2926}
2927
2928
2929/**
2930 * Scans one shadow page table for mappings of a physical page.
2931 *
2932 * @returns true/false indicating removal of all relevant PTEs
2933 * @param pVM The VM handle.
2934 * @param pPhysPage The guest page in question.
2935 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
2936 * @param iShw The shadow page table.
2937 * @param cRefs The number of references made in that PT.
2938 */
2939static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t cRefs)
2940{
2941 LogFlow(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d cRefs=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, cRefs));
2942 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2943 bool bRet = false;
2944
2945 /*
2946 * Assert sanity.
2947 */
2948 Assert(cRefs == 1);
2949 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2950 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2951
2952 /*
2953 * Then, clear the actual mappings to the page in the shadow PT.
2954 */
2955 switch (pPage->enmKind)
2956 {
2957 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2958 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2959 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2960 {
2961 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2962 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2963 uint32_t u32AndMask, u32OrMask;
2964
2965 u32AndMask = 0;
2966 u32OrMask = 0;
2967
2968 if (!fFlushPTEs)
2969 {
2970 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
2971 {
2972 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
2973 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
2974 u32OrMask = X86_PTE_RW;
2975 u32AndMask = UINT32_MAX;
2976 bRet = true;
2977 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
2978 break;
2979
2980 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
2981 u32OrMask = 0;
2982 u32AndMask = ~X86_PTE_RW;
2983 bRet = true;
2984 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
2985 break;
2986 default:
2987 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
2988 break;
2989 }
2990 }
2991 else
2992 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
2993
2994 /* Update the counter if we're removing references. */
2995 if (!u32AndMask)
2996 {
2997 Assert(pPage->cPresent >= cRefs);
2998 Assert(pPool->cPresent >= cRefs);
2999 pPage->cPresent -= cRefs;
3000 pPool->cPresent -= cRefs;
3001 }
3002
3003 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3004 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3005 {
3006 X86PTE Pte;
3007
3008 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
3009 Pte.u = (pPT->a[i].u & u32AndMask) | u32OrMask;
3010 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3011 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3012
3013 ASMAtomicWriteSize(&pPT->a[i].u, Pte.u);
3014 cRefs--;
3015 if (!cRefs)
3016 return bRet;
3017 }
3018#ifdef LOG_ENABLED
3019 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3020 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3021 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3022 {
3023 Log(("i=%d cRefs=%d\n", i, cRefs--));
3024 }
3025#endif
3026 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3027 break;
3028 }
3029
3030 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3031 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3032 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3033 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3034 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3035 {
3036 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3037 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3038 uint64_t u64AndMask, u64OrMask;
3039
3040 u64OrMask = 0;
3041 u64AndMask = 0;
3042 if (!fFlushPTEs)
3043 {
3044 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3045 {
3046 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
3047 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
3048 u64OrMask = X86_PTE_RW;
3049 u64AndMask = UINT64_MAX;
3050 bRet = true;
3051 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3052 break;
3053
3054 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3055 u64OrMask = 0;
3056 u64AndMask = ~((uint64_t)X86_PTE_RW);
3057 bRet = true;
3058 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3059 break;
3060
3061 default:
3062 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3063 break;
3064 }
3065 }
3066 else
3067 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3068
3069 /* Update the counter if we're removing references. */
3070 if (!u64AndMask)
3071 {
3072 Assert(pPage->cPresent >= cRefs);
3073 Assert(pPool->cPresent >= cRefs);
3074 pPage->cPresent -= cRefs;
3075 pPool->cPresent -= cRefs;
3076 }
3077
3078 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3079 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3080 {
3081 X86PTEPAE Pte;
3082
3083 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
3084 Pte.u = (pPT->a[i].u & u64AndMask) | u64OrMask;
3085 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3086 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3087
3088 ASMAtomicWriteSize(&pPT->a[i].u, Pte.u);
3089 cRefs--;
3090 if (!cRefs)
3091 return bRet;
3092 }
3093#ifdef LOG_ENABLED
3094 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3095 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3096 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3097 {
3098 Log(("i=%d cRefs=%d\n", i, cRefs--));
3099 }
3100#endif
3101 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u64=%RX64\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u64));
3102 break;
3103 }
3104
3105 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3106 {
3107 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3108 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3109 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3110 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3111 {
3112 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
3113 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3114 pPT->a[i].u = 0;
3115 cRefs--;
3116
3117 /* Update the counter as we're removing references. */
3118 Assert(pPage->cPresent);
3119 Assert(pPool->cPresent);
3120 pPage->cPresent--;
3121 pPool->cPresent--;
3122
3123 if (!cRefs)
3124 return bRet;
3125 }
3126#ifdef LOG_ENABLED
3127 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3128 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3129 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3130 {
3131 Log(("i=%d cRefs=%d\n", i, cRefs--));
3132 }
3133#endif
3134 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3135 break;
3136 }
3137
3138#ifdef PGM_WITH_LARGE_PAGES
3139 /* Large page case only. */
3140 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3141 {
3142 Assert(HWACCMIsNestedPagingActive(pVM));
3143 Assert(cRefs == 1);
3144
3145 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3146 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3147 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPD->a); i++)
3148 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3149 {
3150 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64 cRefs=%#x\n", i, pPD->a[i], cRefs));
3151 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3152 pPD->a[i].u = 0;
3153 cRefs--;
3154
3155 /* Update the counter as we're removing references. */
3156 Assert(pPage->cPresent);
3157 Assert(pPool->cPresent);
3158 pPage->cPresent--;
3159 pPool->cPresent--;
3160
3161 if (!cRefs)
3162 return bRet;
3163 }
3164# ifdef LOG_ENABLED
3165 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3166 for (unsigned i = 0; i < RT_ELEMENTS(pPD->a); i++)
3167 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3168 {
3169 Log(("i=%d cRefs=%d\n", i, cRefs--));
3170 }
3171# endif
3172 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3173 break;
3174 }
3175
3176 /* AMD-V nested paging - @todo merge with EPT as we only check the parts that are identical. */
3177 case PGMPOOLKIND_PAE_PD_PHYS:
3178 {
3179 Assert(HWACCMIsNestedPagingActive(pVM));
3180 Assert(cRefs == 1);
3181
3182 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3183 PX86PD pPD = (PX86PD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3184 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPD->a); i++)
3185 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3186 {
3187 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64 cRefs=%#x\n", i, pPD->a[i], cRefs));
3188 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3189 pPD->a[i].u = 0;
3190 cRefs--;
3191
3192 /* Update the counter as we're removing references. */
3193 Assert(pPage->cPresent);
3194 Assert(pPool->cPresent);
3195 pPage->cPresent--;
3196 pPool->cPresent--;
3197
3198 if (!cRefs)
3199 return bRet;
3200 }
3201# ifdef LOG_ENABLED
3202 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3203 for (unsigned i = 0; i < RT_ELEMENTS(pPD->a); i++)
3204 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3205 {
3206 Log(("i=%d cRefs=%d\n", i, cRefs--));
3207 }
3208# endif
3209 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3210 break;
3211 }
3212#endif /* PGM_WITH_LARGE_PAGES */
3213
3214 default:
3215 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3216 }
3217 return bRet;
3218}
3219
3220
3221/**
3222 * Scans one shadow page table for mappings of a physical page.
3223 *
3224 * @param pVM The VM handle.
3225 * @param pPhysPage The guest page in question.
3226 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3227 * @param iShw The shadow page table.
3228 * @param cRefs The number of references made in that PT.
3229 */
3230static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t cRefs)
3231{
3232 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3233
3234 /* We should only come here with when there's only one reference in this physical page. */
3235 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3236 Assert(cRefs == 1);
3237
3238 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d cRefs=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, cRefs));
3239 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3240 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, cRefs);
3241 if (!fKeptPTEs)
3242 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3243 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3244}
3245
3246
3247/**
3248 * Flushes a list of shadow page tables mapping the same physical page.
3249 *
3250 * @param pVM The VM handle.
3251 * @param pPhysPage The guest page in question.
3252 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3253 * @param iPhysExt The physical cross reference extent list to flush.
3254 */
3255static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3256{
3257 Assert(PGMIsLockOwner(pVM));
3258 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3259 bool fKeepList = false;
3260
3261 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3262 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3263
3264 const uint16_t iPhysExtStart = iPhysExt;
3265 PPGMPOOLPHYSEXT pPhysExt;
3266 do
3267 {
3268 Assert(iPhysExt < pPool->cMaxPhysExts);
3269 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3270 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3271 {
3272 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3273 {
3274 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], 1);
3275 if (!fKeptPTEs)
3276 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3277 else
3278 fKeepList = true;
3279 }
3280 }
3281 /* next */
3282 iPhysExt = pPhysExt->iNext;
3283 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3284
3285 if (!fKeepList)
3286 {
3287 /* insert the list into the free list and clear the ram range entry. */
3288 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3289 pPool->iPhysExtFreeHead = iPhysExtStart;
3290 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3291 }
3292
3293 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3294}
3295
3296
3297/**
3298 * Flushes all shadow page table mappings of the given guest page.
3299 *
3300 * This is typically called when the host page backing the guest one has been
3301 * replaced or when the page protection was changed due to an access handler.
3302 *
3303 * @returns VBox status code.
3304 * @retval VINF_SUCCESS if all references has been successfully cleared.
3305 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3306 * pool cleaning. FF and sync flags are set.
3307 *
3308 * @param pVM The VM handle.
3309 * @param GCPhysPage GC physical address of the page in question
3310 * @param pPhysPage The guest page in question.
3311 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3312 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3313 * flushed, it is NOT touched if this isn't necessary.
3314 * The caller MUST initialized this to @a false.
3315 */
3316int pgmPoolTrackUpdateGCPhys(PVM pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3317{
3318 PVMCPU pVCpu = VMMGetCpu(pVM);
3319 pgmLock(pVM);
3320 int rc = VINF_SUCCESS;
3321
3322#ifdef PGM_WITH_LARGE_PAGES
3323 /* Is this page part of a large page? */
3324 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3325 {
3326 PPGMPAGE pPhysBase;
3327 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3328
3329 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3330
3331 /* Fetch the large page base. */
3332 if (GCPhysBase != GCPhysPage)
3333 {
3334 pPhysBase = pgmPhysGetPage(&pVM->pgm.s, GCPhysBase);
3335 AssertFatal(pPhysBase);
3336 }
3337 else
3338 pPhysBase = pPhysPage;
3339
3340 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3341
3342 if (PGM_PAGE_GET_PDE_TYPE(pPhysBase) == PGM_PAGE_PDE_TYPE_PDE)
3343 {
3344 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3345 PGM_PAGE_SET_PDE_TYPE(pPhysBase, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3346
3347 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3348 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pPhysBase, fFlushPTEs, pfFlushTLBs);
3349
3350 *pfFlushTLBs = true;
3351 pgmUnlock(pVM);
3352 return rc;
3353 }
3354 }
3355#else
3356 NOREF(GCPhysPage);
3357#endif /* PGM_WITH_LARGE_PAGES */
3358
3359 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3360 if (u16)
3361 {
3362 /*
3363 * The zero page is currently screwing up the tracking and we'll
3364 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3365 * is defined, zero pages won't normally be mapped. Some kind of solution
3366 * will be needed for this problem of course, but it will have to wait...
3367 */
3368 if (PGM_PAGE_IS_ZERO(pPhysPage))
3369 rc = VINF_PGM_GCPHYS_ALIASED;
3370 else
3371 {
3372# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3373 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3374 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3375 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
3376# endif
3377
3378 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3379 pgmPoolTrackFlushGCPhysPT(pVM,
3380 pPhysPage,
3381 fFlushPTEs,
3382 PGMPOOL_TD_GET_IDX(u16),
3383 PGMPOOL_TD_GET_CREFS(u16));
3384 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3385 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3386 else
3387 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3388 *pfFlushTLBs = true;
3389
3390# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3391 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
3392# endif
3393 }
3394 }
3395
3396 if (rc == VINF_PGM_GCPHYS_ALIASED)
3397 {
3398 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3399 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3400 rc = VINF_PGM_SYNC_CR3;
3401 }
3402 pgmUnlock(pVM);
3403 return rc;
3404}
3405
3406
3407/**
3408 * Scans all shadow page tables for mappings of a physical page.
3409 *
3410 * This may be slow, but it's most likely more efficient than cleaning
3411 * out the entire page pool / cache.
3412 *
3413 * @returns VBox status code.
3414 * @retval VINF_SUCCESS if all references has been successfully cleared.
3415 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3416 * a page pool cleaning.
3417 *
3418 * @param pVM The VM handle.
3419 * @param pPhysPage The guest page in question.
3420 */
3421int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3422{
3423 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3424 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3425 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3426 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3427
3428#if 1
3429 /*
3430 * There is a limit to what makes sense.
3431 */
3432 if (pPool->cPresent > 1024)
3433 {
3434 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3435 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3436 return VINF_PGM_GCPHYS_ALIASED;
3437 }
3438#endif
3439
3440 /*
3441 * Iterate all the pages until we've encountered all that in use.
3442 * This is simple but not quite optimal solution.
3443 */
3444 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3445 const uint32_t u32 = u64;
3446 unsigned cLeft = pPool->cUsedPages;
3447 unsigned iPage = pPool->cCurPages;
3448 while (--iPage >= PGMPOOL_IDX_FIRST)
3449 {
3450 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3451 if (pPage->GCPhys != NIL_RTGCPHYS)
3452 {
3453 switch (pPage->enmKind)
3454 {
3455 /*
3456 * We only care about shadow page tables.
3457 */
3458 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3459 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3460 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3461 {
3462 unsigned cPresent = pPage->cPresent;
3463 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3464 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3465 if (pPT->a[i].n.u1Present)
3466 {
3467 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3468 {
3469 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3470 pPT->a[i].u = 0;
3471
3472 /* Update the counter as we're removing references. */
3473 Assert(pPage->cPresent);
3474 Assert(pPool->cPresent);
3475 pPage->cPresent--;
3476 pPool->cPresent--;
3477 }
3478 if (!--cPresent)
3479 break;
3480 }
3481 break;
3482 }
3483
3484 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3485 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3486 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3487 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3488 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3489 {
3490 unsigned cPresent = pPage->cPresent;
3491 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3492 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3493 if (pPT->a[i].n.u1Present)
3494 {
3495 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3496 {
3497 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3498 pPT->a[i].u = 0;
3499
3500 /* Update the counter as we're removing references. */
3501 Assert(pPage->cPresent);
3502 Assert(pPool->cPresent);
3503 pPage->cPresent--;
3504 pPool->cPresent--;
3505 }
3506 if (!--cPresent)
3507 break;
3508 }
3509 break;
3510 }
3511#ifndef IN_RC
3512 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3513 {
3514 unsigned cPresent = pPage->cPresent;
3515 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3516 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3517 if (pPT->a[i].n.u1Present)
3518 {
3519 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3520 {
3521 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3522 pPT->a[i].u = 0;
3523
3524 /* Update the counter as we're removing references. */
3525 Assert(pPage->cPresent);
3526 Assert(pPool->cPresent);
3527 pPage->cPresent--;
3528 pPool->cPresent--;
3529 }
3530 if (!--cPresent)
3531 break;
3532 }
3533 break;
3534 }
3535#endif
3536 }
3537 if (!--cLeft)
3538 break;
3539 }
3540 }
3541
3542 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3543 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3544 return VINF_SUCCESS;
3545}
3546
3547
3548/**
3549 * Clears the user entry in a user table.
3550 *
3551 * This is used to remove all references to a page when flushing it.
3552 */
3553static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3554{
3555 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3556 Assert(pUser->iUser < pPool->cCurPages);
3557 uint32_t iUserTable = pUser->iUserTable;
3558
3559 /*
3560 * Map the user page.
3561 */
3562 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3563 union
3564 {
3565 uint64_t *pau64;
3566 uint32_t *pau32;
3567 } u;
3568 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3569
3570 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3571
3572 /* Safety precaution in case we change the paging for other modes too in the future. */
3573 Assert(!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage));
3574
3575#ifdef VBOX_STRICT
3576 /*
3577 * Some sanity checks.
3578 */
3579 switch (pUserPage->enmKind)
3580 {
3581 case PGMPOOLKIND_32BIT_PD:
3582 case PGMPOOLKIND_32BIT_PD_PHYS:
3583 Assert(iUserTable < X86_PG_ENTRIES);
3584 break;
3585 case PGMPOOLKIND_PAE_PDPT:
3586 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3587 case PGMPOOLKIND_PAE_PDPT_PHYS:
3588 Assert(iUserTable < 4);
3589 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3590 break;
3591 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3592 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3593 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3594 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3595 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3596 case PGMPOOLKIND_PAE_PD_PHYS:
3597 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3598 break;
3599 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3600 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3601 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3602 break;
3603 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3604 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3605 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3606 break;
3607 case PGMPOOLKIND_64BIT_PML4:
3608 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3609 /* GCPhys >> PAGE_SHIFT is the index here */
3610 break;
3611 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3612 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3613 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3614 break;
3615
3616 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3617 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3618 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3619 break;
3620
3621 case PGMPOOLKIND_ROOT_NESTED:
3622 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3623 break;
3624
3625 default:
3626 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3627 break;
3628 }
3629#endif /* VBOX_STRICT */
3630
3631 /*
3632 * Clear the entry in the user page.
3633 */
3634 switch (pUserPage->enmKind)
3635 {
3636 /* 32-bit entries */
3637 case PGMPOOLKIND_32BIT_PD:
3638 case PGMPOOLKIND_32BIT_PD_PHYS:
3639 ASMAtomicWriteSize(&u.pau32[iUserTable], 0);
3640 break;
3641
3642 /* 64-bit entries */
3643 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3644 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3645 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3646 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3647 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3648#if defined(IN_RC)
3649 /* In 32 bits PAE mode we *must* invalidate the TLB when changing a PDPT entry; the CPU fetches them only during cr3 load, so any
3650 * non-present PDPT will continue to cause page faults.
3651 */
3652 ASMReloadCR3();
3653#endif
3654 /* no break */
3655 case PGMPOOLKIND_PAE_PD_PHYS:
3656 case PGMPOOLKIND_PAE_PDPT_PHYS:
3657 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3658 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3659 case PGMPOOLKIND_64BIT_PML4:
3660 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3661 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3662 case PGMPOOLKIND_PAE_PDPT:
3663 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3664 case PGMPOOLKIND_ROOT_NESTED:
3665 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3666 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3667 ASMAtomicWriteSize(&u.pau64[iUserTable], 0);
3668 break;
3669
3670 default:
3671 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3672 }
3673}
3674
3675
3676/**
3677 * Clears all users of a page.
3678 */
3679static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3680{
3681 /*
3682 * Free all the user records.
3683 */
3684 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3685
3686 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3687 uint16_t i = pPage->iUserHead;
3688 while (i != NIL_PGMPOOL_USER_INDEX)
3689 {
3690 /* Clear enter in user table. */
3691 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3692
3693 /* Free it. */
3694 const uint16_t iNext = paUsers[i].iNext;
3695 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3696 paUsers[i].iNext = pPool->iUserFreeHead;
3697 pPool->iUserFreeHead = i;
3698
3699 /* Next. */
3700 i = iNext;
3701 }
3702 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3703}
3704
3705
3706/**
3707 * Allocates a new physical cross reference extent.
3708 *
3709 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3710 * @param pVM The VM handle.
3711 * @param piPhysExt Where to store the phys ext index.
3712 */
3713PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3714{
3715 Assert(PGMIsLockOwner(pVM));
3716 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3717 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3718 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3719 {
3720 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3721 return NULL;
3722 }
3723 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3724 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3725 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3726 *piPhysExt = iPhysExt;
3727 return pPhysExt;
3728}
3729
3730
3731/**
3732 * Frees a physical cross reference extent.
3733 *
3734 * @param pVM The VM handle.
3735 * @param iPhysExt The extent to free.
3736 */
3737void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3738{
3739 Assert(PGMIsLockOwner(pVM));
3740 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3741 Assert(iPhysExt < pPool->cMaxPhysExts);
3742 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3743 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3744 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3745 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3746 pPool->iPhysExtFreeHead = iPhysExt;
3747}
3748
3749
3750/**
3751 * Frees a physical cross reference extent.
3752 *
3753 * @param pVM The VM handle.
3754 * @param iPhysExt The extent to free.
3755 */
3756void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3757{
3758 Assert(PGMIsLockOwner(pVM));
3759 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3760
3761 const uint16_t iPhysExtStart = iPhysExt;
3762 PPGMPOOLPHYSEXT pPhysExt;
3763 do
3764 {
3765 Assert(iPhysExt < pPool->cMaxPhysExts);
3766 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3767 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3768 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3769
3770 /* next */
3771 iPhysExt = pPhysExt->iNext;
3772 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3773
3774 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3775 pPool->iPhysExtFreeHead = iPhysExtStart;
3776}
3777
3778
3779/**
3780 * Insert a reference into a list of physical cross reference extents.
3781 *
3782 * @returns The new tracking data for PGMPAGE.
3783 *
3784 * @param pVM The VM handle.
3785 * @param iPhysExt The physical extent index of the list head.
3786 * @param iShwPT The shadow page table index.
3787 *
3788 */
3789static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
3790{
3791 Assert(PGMIsLockOwner(pVM));
3792 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3793 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3794
3795 /* special common case. */
3796 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3797 {
3798 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3799 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3800 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d}\n", iPhysExt, iShwPT));
3801 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3802 }
3803
3804 /* general treatment. */
3805 const uint16_t iPhysExtStart = iPhysExt;
3806 unsigned cMax = 15;
3807 for (;;)
3808 {
3809 Assert(iPhysExt < pPool->cMaxPhysExts);
3810 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3811 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3812 {
3813 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3814 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3815 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
3816 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3817 }
3818 if (!--cMax)
3819 {
3820 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3821 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3822 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
3823 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3824 }
3825 }
3826
3827 /* add another extent to the list. */
3828 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3829 if (!pNew)
3830 {
3831 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3832 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3833 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
3834 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3835 }
3836 pNew->iNext = iPhysExtStart;
3837 pNew->aidx[0] = iShwPT;
3838 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
3839 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3840}
3841
3842
3843/**
3844 * Add a reference to guest physical page where extents are in use.
3845 *
3846 * @returns The new tracking data for PGMPAGE.
3847 *
3848 * @param pVM The VM handle.
3849 * @param u16 The ram range flags (top 16-bits).
3850 * @param iShwPT The shadow page table index.
3851 */
3852uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
3853{
3854 pgmLock(pVM);
3855 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3856 {
3857 /*
3858 * Convert to extent list.
3859 */
3860 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3861 uint16_t iPhysExt;
3862 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3863 if (pPhysExt)
3864 {
3865 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
3866 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
3867 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
3868 pPhysExt->aidx[1] = iShwPT;
3869 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3870 }
3871 else
3872 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3873 }
3874 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3875 {
3876 /*
3877 * Insert into the extent list.
3878 */
3879 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT);
3880 }
3881 else
3882 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3883 pgmUnlock(pVM);
3884 return u16;
3885}
3886
3887
3888/**
3889 * Clear references to guest physical memory.
3890 *
3891 * @param pPool The pool.
3892 * @param pPage The page.
3893 * @param pPhysPage Pointer to the aPages entry in the ram range.
3894 */
3895void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
3896{
3897 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
3898 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3899
3900 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
3901 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
3902 {
3903 PVM pVM = pPool->CTX_SUFF(pVM);
3904 pgmLock(pVM);
3905
3906 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3907 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3908 do
3909 {
3910 Assert(iPhysExt < pPool->cMaxPhysExts);
3911
3912 /*
3913 * Look for the shadow page and check if it's all freed.
3914 */
3915 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3916 {
3917 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
3918 {
3919 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3920
3921 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3922 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3923 {
3924 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3925 pgmUnlock(pVM);
3926 return;
3927 }
3928
3929 /* we can free the node. */
3930 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3931 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3932 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3933 {
3934 /* lonely node */
3935 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3936 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
3937 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3938 }
3939 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3940 {
3941 /* head */
3942 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
3943 PGM_PAGE_SET_TRACKING(pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
3944 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3945 }
3946 else
3947 {
3948 /* in list */
3949 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3950 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3951 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3952 }
3953 iPhysExt = iPhysExtNext;
3954 pgmUnlock(pVM);
3955 return;
3956 }
3957 }
3958
3959 /* next */
3960 iPhysExtPrev = iPhysExt;
3961 iPhysExt = paPhysExts[iPhysExt].iNext;
3962 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3963
3964 pgmUnlock(pVM);
3965 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3966 }
3967 else /* nothing to do */
3968 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
3969}
3970
3971
3972/**
3973 * Clear references to guest physical memory.
3974 *
3975 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3976 * is assumed to be correct, so the linear search can be skipped and we can assert
3977 * at an earlier point.
3978 *
3979 * @param pPool The pool.
3980 * @param pPage The page.
3981 * @param HCPhys The host physical address corresponding to the guest page.
3982 * @param GCPhys The guest physical address corresponding to HCPhys.
3983 */
3984static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3985{
3986 /*
3987 * Walk range list.
3988 */
3989 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3990 while (pRam)
3991 {
3992 RTGCPHYS off = GCPhys - pRam->GCPhys;
3993 if (off < pRam->cb)
3994 {
3995 /* does it match? */
3996 const unsigned iPage = off >> PAGE_SHIFT;
3997 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3998#ifdef LOG_ENABLED
3999 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
4000 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4001#endif
4002 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4003 {
4004 Assert(pPage->cPresent);
4005 Assert(pPool->cPresent);
4006 pPage->cPresent--;
4007 pPool->cPresent--;
4008 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
4009 return;
4010 }
4011 break;
4012 }
4013 pRam = pRam->CTX_SUFF(pNext);
4014 }
4015 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4016}
4017
4018
4019/**
4020 * Clear references to guest physical memory.
4021 *
4022 * @param pPool The pool.
4023 * @param pPage The page.
4024 * @param HCPhys The host physical address corresponding to the guest page.
4025 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4026 */
4027static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
4028{
4029 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4030
4031 /*
4032 * Walk range list.
4033 */
4034 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
4035 while (pRam)
4036 {
4037 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
4038 if (off < pRam->cb)
4039 {
4040 /* does it match? */
4041 const unsigned iPage = off >> PAGE_SHIFT;
4042 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
4043 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4044 {
4045 Assert(pPage->cPresent);
4046 Assert(pPool->cPresent);
4047 pPage->cPresent--;
4048 pPool->cPresent--;
4049 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
4050 return;
4051 }
4052 break;
4053 }
4054 pRam = pRam->CTX_SUFF(pNext);
4055 }
4056
4057 /*
4058 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4059 */
4060 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4061 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
4062 while (pRam)
4063 {
4064 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4065 while (iPage-- > 0)
4066 {
4067 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4068 {
4069 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4070 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4071 Assert(pPage->cPresent);
4072 Assert(pPool->cPresent);
4073 pPage->cPresent--;
4074 pPool->cPresent--;
4075 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
4076 return;
4077 }
4078 }
4079 pRam = pRam->CTX_SUFF(pNext);
4080 }
4081
4082 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
4083}
4084
4085
4086/**
4087 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4088 *
4089 * @param pPool The pool.
4090 * @param pPage The page.
4091 * @param pShwPT The shadow page table (mapping of the page).
4092 * @param pGstPT The guest page table.
4093 */
4094DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4095{
4096 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4097 if (pShwPT->a[i].n.u1Present)
4098 {
4099 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4100 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4101 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
4102 if (!pPage->cPresent)
4103 break;
4104 }
4105}
4106
4107
4108/**
4109 * Clear references to guest physical memory in a PAE / 32-bit page table.
4110 *
4111 * @param pPool The pool.
4112 * @param pPage The page.
4113 * @param pShwPT The shadow page table (mapping of the page).
4114 * @param pGstPT The guest page table (just a half one).
4115 */
4116DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
4117{
4118 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4119 if (pShwPT->a[i].n.u1Present)
4120 {
4121 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4122 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4123 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
4124 if (!pPage->cPresent)
4125 break;
4126 }
4127}
4128
4129
4130/**
4131 * Clear references to guest physical memory in a PAE / PAE page table.
4132 *
4133 * @param pPool The pool.
4134 * @param pPage The page.
4135 * @param pShwPT The shadow page table (mapping of the page).
4136 * @param pGstPT The guest page table.
4137 */
4138DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
4139{
4140 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4141 if (pShwPT->a[i].n.u1Present)
4142 {
4143 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4144 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4145 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
4146 if (!pPage->cPresent)
4147 break;
4148 }
4149}
4150
4151
4152/**
4153 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4154 *
4155 * @param pPool The pool.
4156 * @param pPage The page.
4157 * @param pShwPT The shadow page table (mapping of the page).
4158 */
4159DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4160{
4161 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4162 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4163 if (pShwPT->a[i].n.u1Present)
4164 {
4165 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4166 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4167 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
4168 if (!pPage->cPresent)
4169 break;
4170 }
4171}
4172
4173
4174/**
4175 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4176 *
4177 * @param pPool The pool.
4178 * @param pPage The page.
4179 * @param pShwPT The shadow page table (mapping of the page).
4180 */
4181DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
4182{
4183 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4184 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4185 if (pShwPT->a[i].n.u1Present)
4186 {
4187 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4188 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
4189 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
4190 if (!pPage->cPresent)
4191 break;
4192 }
4193}
4194
4195
4196/**
4197 * Clear references to shadowed pages in an EPT page table.
4198 *
4199 * @param pPool The pool.
4200 * @param pPage The page.
4201 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4202 */
4203DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4204{
4205 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4206 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4207 if (pShwPT->a[i].n.u1Present)
4208 {
4209 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4210 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4211 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
4212 if (!pPage->cPresent)
4213 break;
4214 }
4215}
4216
4217
4218
4219/**
4220 * Clear references to shadowed pages in a 32 bits page directory.
4221 *
4222 * @param pPool The pool.
4223 * @param pPage The page.
4224 * @param pShwPD The shadow page directory (mapping of the page).
4225 */
4226DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4227{
4228 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4229 {
4230 if ( pShwPD->a[i].n.u1Present
4231 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4232 )
4233 {
4234 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4235 if (pSubPage)
4236 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4237 else
4238 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4239 }
4240 }
4241}
4242
4243/**
4244 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4245 *
4246 * @param pPool The pool.
4247 * @param pPage The page.
4248 * @param pShwPD The shadow page directory (mapping of the page).
4249 */
4250DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4251{
4252 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4253 {
4254 if ( pShwPD->a[i].n.u1Present
4255 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4256 )
4257 {
4258#ifdef PGM_WITH_LARGE_PAGES
4259 if (pShwPD->a[i].b.u1Size)
4260 {
4261 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4262 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4263 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys /* == base of 2 MB page */);
4264 }
4265 else
4266#endif
4267 {
4268 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4269 if (pSubPage)
4270 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4271 else
4272 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4273 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4274 }
4275 }
4276 }
4277}
4278
4279/**
4280 * Clear references to shadowed pages in a PAE page directory pointer table.
4281 *
4282 * @param pPool The pool.
4283 * @param pPage The page.
4284 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4285 */
4286DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4287{
4288 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4289 {
4290 if ( pShwPDPT->a[i].n.u1Present
4291 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4292 )
4293 {
4294 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4295 if (pSubPage)
4296 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4297 else
4298 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4299 }
4300 }
4301}
4302
4303
4304/**
4305 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4306 *
4307 * @param pPool The pool.
4308 * @param pPage The page.
4309 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4310 */
4311DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4312{
4313 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4314 {
4315 Assert(!(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING));
4316 if (pShwPDPT->a[i].n.u1Present)
4317 {
4318 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4319 if (pSubPage)
4320 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4321 else
4322 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4323 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4324 }
4325 }
4326}
4327
4328
4329/**
4330 * Clear references to shadowed pages in a 64-bit level 4 page table.
4331 *
4332 * @param pPool The pool.
4333 * @param pPage The page.
4334 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4335 */
4336DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4337{
4338 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4339 {
4340 if (pShwPML4->a[i].n.u1Present)
4341 {
4342 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4343 if (pSubPage)
4344 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4345 else
4346 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4347 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4348 }
4349 }
4350}
4351
4352
4353/**
4354 * Clear references to shadowed pages in an EPT page directory.
4355 *
4356 * @param pPool The pool.
4357 * @param pPage The page.
4358 * @param pShwPD The shadow page directory (mapping of the page).
4359 */
4360DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4361{
4362 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4363 {
4364 if (pShwPD->a[i].n.u1Present)
4365 {
4366#ifdef PGM_WITH_LARGE_PAGES
4367 if (pShwPD->a[i].b.u1Size)
4368 {
4369 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4370 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4371 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys /* == base of 2 MB page */);
4372 }
4373 else
4374#endif
4375 {
4376 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4377 if (pSubPage)
4378 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4379 else
4380 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4381 }
4382 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4383 }
4384 }
4385}
4386
4387
4388/**
4389 * Clear references to shadowed pages in an EPT page directory pointer table.
4390 *
4391 * @param pPool The pool.
4392 * @param pPage The page.
4393 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4394 */
4395DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4396{
4397 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4398 {
4399 if (pShwPDPT->a[i].n.u1Present)
4400 {
4401 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4402 if (pSubPage)
4403 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4404 else
4405 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4406 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4407 }
4408 }
4409}
4410
4411
4412/**
4413 * Clears all references made by this page.
4414 *
4415 * This includes other shadow pages and GC physical addresses.
4416 *
4417 * @param pPool The pool.
4418 * @param pPage The page.
4419 */
4420static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4421{
4422 /*
4423 * Map the shadow page and take action according to the page kind.
4424 */
4425 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
4426 switch (pPage->enmKind)
4427 {
4428 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4429 {
4430 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4431 void *pvGst;
4432 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4433 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4434 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4435 break;
4436 }
4437
4438 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4439 {
4440 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4441 void *pvGst;
4442 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4443 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
4444 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4445 break;
4446 }
4447
4448 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4449 {
4450 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4451 void *pvGst;
4452 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4453 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
4454 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4455 break;
4456 }
4457
4458 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4459 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4460 {
4461 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4462 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4463 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4464 break;
4465 }
4466
4467 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4468 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4469 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4470 {
4471 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4472 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
4473 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4474 break;
4475 }
4476
4477 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4478 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4479 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4480 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4481 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4482 case PGMPOOLKIND_PAE_PD_PHYS:
4483 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4484 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4485 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4486 break;
4487
4488 case PGMPOOLKIND_32BIT_PD_PHYS:
4489 case PGMPOOLKIND_32BIT_PD:
4490 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4491 break;
4492
4493 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4494 case PGMPOOLKIND_PAE_PDPT:
4495 case PGMPOOLKIND_PAE_PDPT_PHYS:
4496 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4497 break;
4498
4499 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4500 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4501 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4502 break;
4503
4504 case PGMPOOLKIND_64BIT_PML4:
4505 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4506 break;
4507
4508 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4509 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4510 break;
4511
4512 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4513 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4514 break;
4515
4516 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4517 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4518 break;
4519
4520 default:
4521 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4522 }
4523
4524 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4525 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4526 ASMMemZeroPage(pvShw);
4527 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4528 pPage->fZeroed = true;
4529 PGMPOOL_UNLOCK_PTR(pPool->CTX_SUFF(pVM), pvShw);
4530}
4531
4532/**
4533 * Flushes a pool page.
4534 *
4535 * This moves the page to the free list after removing all user references to it.
4536 *
4537 * @returns VBox status code.
4538 * @retval VINF_SUCCESS on success.
4539 * @param pPool The pool.
4540 * @param HCPhys The HC physical address of the shadow page.
4541 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4542 */
4543int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4544{
4545 PVM pVM = pPool->CTX_SUFF(pVM);
4546 bool fFlushRequired = false;
4547
4548 int rc = VINF_SUCCESS;
4549 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4550 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4551 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4552
4553 /*
4554 * Quietly reject any attempts at flushing any of the special root pages.
4555 */
4556 if (pPage->idx < PGMPOOL_IDX_FIRST)
4557 {
4558 AssertFailed(); /* can no longer happen */
4559 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4560 return VINF_SUCCESS;
4561 }
4562
4563 pgmLock(pVM);
4564
4565 /*
4566 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4567 */
4568 if (pgmPoolIsPageLocked(&pVM->pgm.s, pPage))
4569 {
4570 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4571 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4572 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4573 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4574 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4575 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4576 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4577 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4578 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD,
4579 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4580 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4581 pgmUnlock(pVM);
4582 return VINF_SUCCESS;
4583 }
4584
4585#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4586 /* Start a subset so we won't run out of mapping space. */
4587 PVMCPU pVCpu = VMMGetCpu(pVM);
4588 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4589#endif
4590
4591 /*
4592 * Mark the page as being in need of an ASMMemZeroPage().
4593 */
4594 pPage->fZeroed = false;
4595
4596#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4597 if (pPage->fDirty)
4598 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirty, false /* do not remove */);
4599#endif
4600
4601 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4602 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4603 fFlushRequired = true;
4604
4605 /*
4606 * Clear the page.
4607 */
4608 pgmPoolTrackClearPageUsers(pPool, pPage);
4609 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4610 pgmPoolTrackDeref(pPool, pPage);
4611 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4612
4613 /*
4614 * Flush it from the cache.
4615 */
4616 pgmPoolCacheFlushPage(pPool, pPage);
4617
4618#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4619 /* Heavy stuff done. */
4620 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4621#endif
4622
4623 /*
4624 * Deregistering the monitoring.
4625 */
4626 if (pPage->fMonitored)
4627 rc = pgmPoolMonitorFlush(pPool, pPage);
4628
4629 /*
4630 * Free the page.
4631 */
4632 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4633 pPage->iNext = pPool->iFreeHead;
4634 pPool->iFreeHead = pPage->idx;
4635 pPage->enmKind = PGMPOOLKIND_FREE;
4636 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4637 pPage->GCPhys = NIL_RTGCPHYS;
4638 pPage->fReusedFlushPending = false;
4639
4640 pPool->cUsedPages--;
4641
4642 /* Flush the TLBs of all VCPUs if required. */
4643 if ( fFlushRequired
4644 && fFlush)
4645 {
4646 PGM_INVL_ALL_VCPU_TLBS(pVM);
4647 }
4648
4649 pgmUnlock(pVM);
4650 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4651 return rc;
4652}
4653
4654
4655/**
4656 * Frees a usage of a pool page.
4657 *
4658 * The caller is responsible to updating the user table so that it no longer
4659 * references the shadow page.
4660 *
4661 * @param pPool The pool.
4662 * @param HCPhys The HC physical address of the shadow page.
4663 * @param iUser The shadow page pool index of the user table.
4664 * @param iUserTable The index into the user table (shadowed).
4665 */
4666void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4667{
4668 PVM pVM = pPool->CTX_SUFF(pVM);
4669
4670 STAM_PROFILE_START(&pPool->StatFree, a);
4671 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%#x iUserTable=%#x\n",
4672 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4673 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4674 pgmLock(pVM);
4675 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4676 if (!pPage->fCached)
4677 pgmPoolFlushPage(pPool, pPage);
4678 pgmUnlock(pVM);
4679 STAM_PROFILE_STOP(&pPool->StatFree, a);
4680}
4681
4682
4683/**
4684 * Makes one or more free page free.
4685 *
4686 * @returns VBox status code.
4687 * @retval VINF_SUCCESS on success.
4688 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4689 *
4690 * @param pPool The pool.
4691 * @param enmKind Page table kind
4692 * @param iUser The user of the page.
4693 */
4694static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4695{
4696 PVM pVM = pPool->CTX_SUFF(pVM);
4697
4698 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
4699
4700 /*
4701 * If the pool isn't full grown yet, expand it.
4702 */
4703 if ( pPool->cCurPages < pPool->cMaxPages
4704#if defined(IN_RC)
4705 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
4706 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4707 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
4708#endif
4709 )
4710 {
4711 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4712#ifdef IN_RING3
4713 int rc = PGMR3PoolGrow(pVM);
4714#else
4715 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
4716#endif
4717 if (RT_FAILURE(rc))
4718 return rc;
4719 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4720 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4721 return VINF_SUCCESS;
4722 }
4723
4724 /*
4725 * Free one cached page.
4726 */
4727 return pgmPoolCacheFreeOne(pPool, iUser);
4728}
4729
4730/**
4731 * Allocates a page from the pool.
4732 *
4733 * This page may actually be a cached page and not in need of any processing
4734 * on the callers part.
4735 *
4736 * @returns VBox status code.
4737 * @retval VINF_SUCCESS if a NEW page was allocated.
4738 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4739 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4740 * @param pVM The VM handle.
4741 * @param GCPhys The GC physical address of the page we're gonna shadow.
4742 * For 4MB and 2MB PD entries, it's the first address the
4743 * shadow PT is covering.
4744 * @param enmKind The kind of mapping.
4745 * @param enmAccess Access type for the mapping (only relevant for big pages)
4746 * @param iUser The shadow page pool index of the user table.
4747 * @param iUserTable The index into the user table (shadowed).
4748 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4749 * @param fLockPage Lock the page
4750 */
4751int pgmPoolAllocEx(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage, bool fLockPage)
4752{
4753 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4754 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4755 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%#x iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4756 *ppPage = NULL;
4757 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4758 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4759 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4760
4761 pgmLock(pVM);
4762
4763 if (pPool->fCacheEnabled)
4764 {
4765 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, iUser, iUserTable, ppPage);
4766 if (RT_SUCCESS(rc2))
4767 {
4768 if (fLockPage)
4769 pgmPoolLockPage(pPool, *ppPage);
4770 pgmUnlock(pVM);
4771 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4772 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4773 return rc2;
4774 }
4775 }
4776
4777 /*
4778 * Allocate a new one.
4779 */
4780 int rc = VINF_SUCCESS;
4781 uint16_t iNew = pPool->iFreeHead;
4782 if (iNew == NIL_PGMPOOL_IDX)
4783 {
4784 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
4785 if (RT_FAILURE(rc))
4786 {
4787 pgmUnlock(pVM);
4788 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4789 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4790 return rc;
4791 }
4792 iNew = pPool->iFreeHead;
4793 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4794 }
4795
4796 /* unlink the free head */
4797 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4798 pPool->iFreeHead = pPage->iNext;
4799 pPage->iNext = NIL_PGMPOOL_IDX;
4800
4801 /*
4802 * Initialize it.
4803 */
4804 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4805 pPage->enmKind = enmKind;
4806 pPage->enmAccess = enmAccess;
4807 pPage->GCPhys = GCPhys;
4808 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4809 pPage->fMonitored = false;
4810 pPage->fCached = false;
4811#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4812 pPage->fDirty = false;
4813#endif
4814 pPage->fReusedFlushPending = false;
4815 pPage->cModifications = 0;
4816 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4817 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4818 pPage->cPresent = 0;
4819 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
4820 pPage->pvLastAccessHandlerFault = 0;
4821 pPage->cLastAccessHandlerCount = 0;
4822 pPage->pvLastAccessHandlerRip = 0;
4823
4824 /*
4825 * Insert into the tracking and cache. If this fails, free the page.
4826 */
4827 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4828 if (RT_FAILURE(rc3))
4829 {
4830 pPool->cUsedPages--;
4831 pPage->enmKind = PGMPOOLKIND_FREE;
4832 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4833 pPage->GCPhys = NIL_RTGCPHYS;
4834 pPage->iNext = pPool->iFreeHead;
4835 pPool->iFreeHead = pPage->idx;
4836 pgmUnlock(pVM);
4837 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4838 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4839 return rc3;
4840 }
4841
4842 /*
4843 * Commit the allocation, clear the page and return.
4844 */
4845#ifdef VBOX_WITH_STATISTICS
4846 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4847 pPool->cUsedPagesHigh = pPool->cUsedPages;
4848#endif
4849
4850 if (!pPage->fZeroed)
4851 {
4852 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4853 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4854 ASMMemZeroPage(pv);
4855 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4856 }
4857
4858 *ppPage = pPage;
4859 if (fLockPage)
4860 pgmPoolLockPage(pPool, pPage);
4861 pgmUnlock(pVM);
4862 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4863 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4864 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4865 return rc;
4866}
4867
4868
4869/**
4870 * Frees a usage of a pool page.
4871 *
4872 * @param pVM The VM handle.
4873 * @param HCPhys The HC physical address of the shadow page.
4874 * @param iUser The shadow page pool index of the user table.
4875 * @param iUserTable The index into the user table (shadowed).
4876 */
4877void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4878{
4879 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4880 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4881 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4882}
4883
4884/**
4885 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
4886 *
4887 * @returns Pointer to the shadow page structure.
4888 * @param pPool The pool.
4889 * @param HCPhys The HC physical address of the shadow page.
4890 */
4891PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
4892{
4893 PVM pVM = pPool->CTX_SUFF(pVM);
4894
4895 Assert(PGMIsLockOwner(pVM));
4896
4897 /*
4898 * Look up the page.
4899 */
4900 pgmLock(pVM);
4901 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
4902 pgmUnlock(pVM);
4903
4904 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
4905 return pPage;
4906}
4907
4908#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
4909/**
4910 * Flush the specified page if present
4911 *
4912 * @param pVM The VM handle.
4913 * @param GCPhys Guest physical address of the page to flush
4914 */
4915void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
4916{
4917 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4918
4919 VM_ASSERT_EMT(pVM);
4920
4921 /*
4922 * Look up the GCPhys in the hash.
4923 */
4924 GCPhys = GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
4925 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
4926 if (i == NIL_PGMPOOL_IDX)
4927 return;
4928
4929 do
4930 {
4931 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4932 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
4933 {
4934 switch (pPage->enmKind)
4935 {
4936 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4937 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4938 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4939 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4940 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4941 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4942 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4943 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4944 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4945 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4946 case PGMPOOLKIND_64BIT_PML4:
4947 case PGMPOOLKIND_32BIT_PD:
4948 case PGMPOOLKIND_PAE_PDPT:
4949 {
4950 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
4951#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4952 if (pPage->fDirty)
4953 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
4954 else
4955#endif
4956 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
4957 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
4958 pgmPoolMonitorChainFlush(pPool, pPage);
4959 return;
4960 }
4961
4962 /* ignore, no monitoring. */
4963 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4964 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4965 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4966 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4967 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4968 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4969 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4970 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4971 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4972 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4973 case PGMPOOLKIND_ROOT_NESTED:
4974 case PGMPOOLKIND_PAE_PD_PHYS:
4975 case PGMPOOLKIND_PAE_PDPT_PHYS:
4976 case PGMPOOLKIND_32BIT_PD_PHYS:
4977 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4978 break;
4979
4980 default:
4981 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
4982 }
4983 }
4984
4985 /* next */
4986 i = pPage->iNext;
4987 } while (i != NIL_PGMPOOL_IDX);
4988 return;
4989}
4990#endif /* IN_RING3 */
4991
4992#ifdef IN_RING3
4993
4994
4995/**
4996 * Reset CPU on hot plugging.
4997 *
4998 * @param pVM The VM handle.
4999 * @param pVCpu The virtual CPU.
5000 */
5001void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5002{
5003 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
5004
5005 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5006 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5007 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5008}
5009
5010
5011/**
5012 * Flushes the entire cache.
5013 *
5014 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5015 * this and execute this CR3 flush.
5016 *
5017 * @param pPool The pool.
5018 */
5019void pgmR3PoolReset(PVM pVM)
5020{
5021 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5022
5023 Assert(PGMIsLockOwner(pVM));
5024 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5025 LogFlow(("pgmR3PoolReset:\n"));
5026
5027 /*
5028 * If there are no pages in the pool, there is nothing to do.
5029 */
5030 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5031 {
5032 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5033 return;
5034 }
5035
5036 /*
5037 * Exit the shadow mode since we're going to clear everything,
5038 * including the root page.
5039 */
5040 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5041 {
5042 PVMCPU pVCpu = &pVM->aCpus[i];
5043 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
5044 }
5045
5046 /*
5047 * Nuke the free list and reinsert all pages into it.
5048 */
5049 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5050 {
5051 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5052
5053 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
5054 if (pPage->fMonitored)
5055 pgmPoolMonitorFlush(pPool, pPage);
5056 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5057 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5058 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5059 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5060 pPage->cModifications = 0;
5061 pPage->GCPhys = NIL_RTGCPHYS;
5062 pPage->enmKind = PGMPOOLKIND_FREE;
5063 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5064 Assert(pPage->idx == i);
5065 pPage->iNext = i + 1;
5066 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5067 pPage->fSeenNonGlobal = false;
5068 pPage->fMonitored = false;
5069#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5070 pPage->fDirty = false;
5071#endif
5072 pPage->fCached = false;
5073 pPage->fReusedFlushPending = false;
5074 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5075 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5076 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5077 pPage->cLocked = 0;
5078 }
5079 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5080 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5081 pPool->cUsedPages = 0;
5082
5083 /*
5084 * Zap and reinitialize the user records.
5085 */
5086 pPool->cPresent = 0;
5087 pPool->iUserFreeHead = 0;
5088 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5089 const unsigned cMaxUsers = pPool->cMaxUsers;
5090 for (unsigned i = 0; i < cMaxUsers; i++)
5091 {
5092 paUsers[i].iNext = i + 1;
5093 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5094 paUsers[i].iUserTable = 0xfffffffe;
5095 }
5096 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5097
5098 /*
5099 * Clear all the GCPhys links and rebuild the phys ext free list.
5100 */
5101 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
5102 pRam;
5103 pRam = pRam->CTX_SUFF(pNext))
5104 {
5105 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5106 while (iPage-- > 0)
5107 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
5108 }
5109
5110 pPool->iPhysExtFreeHead = 0;
5111 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5112 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5113 for (unsigned i = 0; i < cMaxPhysExts; i++)
5114 {
5115 paPhysExts[i].iNext = i + 1;
5116 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5117 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5118 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5119 }
5120 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5121
5122 /*
5123 * Just zap the modified list.
5124 */
5125 pPool->cModifiedPages = 0;
5126 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5127
5128 /*
5129 * Clear the GCPhys hash and the age list.
5130 */
5131 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5132 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5133 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5134 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5135
5136#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5137 /* Clear all dirty pages. */
5138 pPool->idxFreeDirtyPage = 0;
5139 pPool->cDirtyPages = 0;
5140 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
5141 pPool->aIdxDirtyPages[i] = NIL_PGMPOOL_IDX;
5142#endif
5143
5144 /*
5145 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5146 */
5147 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
5148 {
5149 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5150 pPage->iNext = NIL_PGMPOOL_IDX;
5151 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5152 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5153 pPage->cModifications = 0;
5154 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
5155 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5156 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5157 if (pPage->fMonitored)
5158 {
5159 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
5160 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
5161 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
5162 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
5163 pPool->pszAccessHandler);
5164 AssertFatalRCSuccess(rc);
5165 pgmPoolHashInsert(pPool, pPage);
5166 }
5167 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
5168 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
5169 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
5170 }
5171
5172 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5173 {
5174 /*
5175 * Re-enter the shadowing mode and assert Sync CR3 FF.
5176 */
5177 PVMCPU pVCpu = &pVM->aCpus[i];
5178 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5179 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5180 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5181 }
5182
5183 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5184}
5185#endif /* IN_RING3 */
5186
5187#ifdef LOG_ENABLED
5188static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5189{
5190 switch(enmKind)
5191 {
5192 case PGMPOOLKIND_INVALID:
5193 return "PGMPOOLKIND_INVALID";
5194 case PGMPOOLKIND_FREE:
5195 return "PGMPOOLKIND_FREE";
5196 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5197 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5198 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5199 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5200 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5201 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5202 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5203 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5204 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5205 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5206 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5207 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5208 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5209 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5210 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5211 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5212 case PGMPOOLKIND_32BIT_PD:
5213 return "PGMPOOLKIND_32BIT_PD";
5214 case PGMPOOLKIND_32BIT_PD_PHYS:
5215 return "PGMPOOLKIND_32BIT_PD_PHYS";
5216 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5217 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5218 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5219 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5220 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5221 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5222 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5223 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5224 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5225 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5226 case PGMPOOLKIND_PAE_PD_PHYS:
5227 return "PGMPOOLKIND_PAE_PD_PHYS";
5228 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5229 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5230 case PGMPOOLKIND_PAE_PDPT:
5231 return "PGMPOOLKIND_PAE_PDPT";
5232 case PGMPOOLKIND_PAE_PDPT_PHYS:
5233 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5234 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5235 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5236 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5237 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5238 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5239 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5240 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5241 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5242 case PGMPOOLKIND_64BIT_PML4:
5243 return "PGMPOOLKIND_64BIT_PML4";
5244 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5245 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5246 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5247 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5248 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5249 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5250 case PGMPOOLKIND_ROOT_NESTED:
5251 return "PGMPOOLKIND_ROOT_NESTED";
5252 }
5253 return "Unknown kind!";
5254}
5255#endif /* LOG_ENABLED*/
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette