VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 31640

Last change on this file since 31640 was 31636, checked in by vboxsync, 15 years ago

EM: Addressed VBOXSTRICTRC todo from r64673. Decided to only use VBOXSTRICTRC on the methods that would actually return VINF_EM_* stuff.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 199.4 KB
Line 
1/* $Id: PGMAllPool.cpp 31636 2010-08-13 12:03:15Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2010 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#include <VBox/pgm.h>
24#include <VBox/mm.h>
25#include <VBox/em.h>
26#include <VBox/cpum.h>
27#ifdef IN_RC
28# include <VBox/patm.h>
29#endif
30#include "../PGMInternal.h"
31#include <VBox/vm.h>
32#include "../PGMInline.h"
33#include <VBox/disopcode.h>
34#include <VBox/hwacc_vmx.h>
35
36#include <VBox/log.h>
37#include <VBox/err.h>
38#include <iprt/asm.h>
39#include <iprt/asm-amd64-x86.h>
40#include <iprt/string.h>
41
42
43/*******************************************************************************
44* Defined Constants And Macros *
45*******************************************************************************/
46/**
47 * Checks if a PAE PTE entry is actually present and not just invalid because
48 * of the MMIO optimization.
49 * @todo Move this to PGMInternal.h if necessary.
50 */
51#ifdef PGM_WITH_MMIO_OPTIMIZATIONS
52# define PGM_POOL_IS_PAE_PTE_PRESENT(Pte) \
53 ( ((Pte).u & (X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == X86_PTE_P)
54#else
55# define PGM_POOL_IS_PAE_PTE_PRESENT(Pte) \
56 ( (Pte).n.u1Present )
57#endif
58
59/**
60 * Checks if a EPT PTE entry is actually present and not just invalid
61 * because of the MMIO optimization.
62 * @todo Move this to PGMInternal.h if necessary.
63 */
64#define PGM_POOL_IS_EPT_PTE_PRESENT(Pte) \
65 ( (Pte).n.u1Present )
66
67
68/*******************************************************************************
69* Internal Functions *
70*******************************************************************************/
71RT_C_DECLS_BEGIN
72static void pgmPoolFlushAllInt(PPGMPOOL pPool);
73DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
74DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
75static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
76static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
77static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
78#ifndef IN_RING3
79DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
80#endif
81#ifdef LOG_ENABLED
82static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
83#endif
84#if defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)
85static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT);
86#endif
87
88int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
89PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
90void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
91void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
92
93RT_C_DECLS_END
94
95
96/**
97 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
98 *
99 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
100 * @param enmKind The page kind.
101 */
102DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
103{
104 switch (enmKind)
105 {
106 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
107 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
108 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
109 return true;
110 default:
111 return false;
112 }
113}
114
115
116/**
117 * Flushes a chain of pages sharing the same access monitor.
118 *
119 * @returns VBox status code suitable for scheduling.
120 * @param pPool The pool.
121 * @param pPage A page in the chain.
122 * @todo VBOXSTRICTRC
123 */
124int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
125{
126 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
127
128 /*
129 * Find the list head.
130 */
131 uint16_t idx = pPage->idx;
132 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
133 {
134 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
135 {
136 idx = pPage->iMonitoredPrev;
137 Assert(idx != pPage->idx);
138 pPage = &pPool->aPages[idx];
139 }
140 }
141
142 /*
143 * Iterate the list flushing each shadow page.
144 */
145 int rc = VINF_SUCCESS;
146 for (;;)
147 {
148 idx = pPage->iMonitoredNext;
149 Assert(idx != pPage->idx);
150 if (pPage->idx >= PGMPOOL_IDX_FIRST)
151 {
152 int rc2 = pgmPoolFlushPage(pPool, pPage);
153 AssertRC(rc2);
154 }
155 /* next */
156 if (idx == NIL_PGMPOOL_IDX)
157 break;
158 pPage = &pPool->aPages[idx];
159 }
160 return rc;
161}
162
163
164/**
165 * Wrapper for getting the current context pointer to the entry being modified.
166 *
167 * @returns VBox status code suitable for scheduling.
168 * @param pVM VM Handle.
169 * @param pvDst Destination address
170 * @param pvSrc Source guest virtual address.
171 * @param GCPhysSrc The source guest physical address.
172 * @param cb Size of data to read
173 */
174DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
175{
176#if defined(IN_RING3)
177 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
178 return VINF_SUCCESS;
179#else
180 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
181 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
182#endif
183}
184
185/**
186 * Process shadow entries before they are changed by the guest.
187 *
188 * For PT entries we will clear them. For PD entries, we'll simply check
189 * for mapping conflicts and set the SyncCR3 FF if found.
190 *
191 * @param pVCpu VMCPU handle
192 * @param pPool The pool.
193 * @param pPage The head page.
194 * @param GCPhysFault The guest physical fault address.
195 * @param uAddress In R0 and GC this is the guest context fault address (flat).
196 * In R3 this is the host context 'fault' address.
197 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
198 */
199void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, unsigned cbWrite)
200{
201 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%#x (idx=%#x)\n", pPage->iMonitoredPrev, pPage->idx));
202 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
203 PVM pVM = pPool->CTX_SUFF(pVM);
204
205 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))pvAddress, GCPhysFault, cbWrite));
206
207 for (;;)
208 {
209 union
210 {
211 void *pv;
212 PX86PT pPT;
213 PX86PTPAE pPTPae;
214 PX86PD pPD;
215 PX86PDPAE pPDPae;
216 PX86PDPT pPDPT;
217 PX86PML4 pPML4;
218 } uShw;
219
220 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s\n", pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
221
222 uShw.pv = NULL;
223 switch (pPage->enmKind)
224 {
225 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
226 {
227 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
228 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
229 const unsigned iShw = off / sizeof(X86PTE);
230 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
231 if (uShw.pPT->a[iShw].n.u1Present)
232 {
233 X86PTE GstPte;
234
235 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
236 AssertRC(rc);
237 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
238 pgmPoolTracDerefGCPhysHint(pPool, pPage,
239 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
240 GstPte.u & X86_PTE_PG_MASK,
241 iShw);
242 ASMAtomicWriteSize(&uShw.pPT->a[iShw], 0);
243 }
244 break;
245 }
246
247 /* page/2 sized */
248 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
249 {
250 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
251 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
252 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
253 {
254 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
255 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
256 if (PGM_POOL_IS_PAE_PTE_PRESENT(uShw.pPTPae->a[iShw]))
257 {
258 X86PTE GstPte;
259 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
260 AssertRC(rc);
261
262 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
263 pgmPoolTracDerefGCPhysHint(pPool, pPage,
264 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
265 GstPte.u & X86_PTE_PG_MASK,
266 iShw);
267 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw], 0);
268 }
269 }
270 break;
271 }
272
273 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
274 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
275 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
276 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
277 {
278 unsigned iGst = off / sizeof(X86PDE);
279 unsigned iShwPdpt = iGst / 256;
280 unsigned iShw = (iGst % 256) * 2;
281 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
282
283 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
284 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
285 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
286 {
287 for (unsigned i = 0; i < 2; i++)
288 {
289# ifndef IN_RING0
290 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
291 {
292 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
293 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
294 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
295 break;
296 }
297 else
298# endif /* !IN_RING0 */
299 if (uShw.pPDPae->a[iShw+i].n.u1Present)
300 {
301 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
302 pgmPoolFree(pVM,
303 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
304 pPage->idx,
305 iShw + i);
306 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw+i], 0);
307 }
308
309 /* paranoia / a bit assumptive. */
310 if ( (off & 3)
311 && (off & 3) + cbWrite > 4)
312 {
313 const unsigned iShw2 = iShw + 2 + i;
314 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
315 {
316# ifndef IN_RING0
317 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
318 {
319 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
320 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
321 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
322 break;
323 }
324 else
325# endif /* !IN_RING0 */
326 if (uShw.pPDPae->a[iShw2].n.u1Present)
327 {
328 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
329 pgmPoolFree(pVM,
330 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
331 pPage->idx,
332 iShw2);
333 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
334 }
335 }
336 }
337 }
338 }
339 break;
340 }
341
342 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
343 {
344 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
345 const unsigned iShw = off / sizeof(X86PTEPAE);
346 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
347 if (PGM_POOL_IS_PAE_PTE_PRESENT(uShw.pPTPae->a[iShw]))
348 {
349 X86PTEPAE GstPte;
350 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
351 AssertRC(rc);
352
353 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
354 pgmPoolTracDerefGCPhysHint(pPool, pPage,
355 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
356 GstPte.u & X86_PTE_PAE_PG_MASK,
357 iShw);
358 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw].u, 0);
359 }
360
361 /* paranoia / a bit assumptive. */
362 if ( (off & 7)
363 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
364 {
365 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
366 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
367
368 if (PGM_POOL_IS_PAE_PTE_PRESENT(uShw.pPTPae->a[iShw2]))
369 {
370 X86PTEPAE GstPte;
371# ifdef IN_RING3
372 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
373# else
374 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
375# endif
376 AssertRC(rc);
377 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
378 pgmPoolTracDerefGCPhysHint(pPool, pPage,
379 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
380 GstPte.u & X86_PTE_PAE_PG_MASK,
381 iShw2);
382 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw2].u ,0);
383 }
384 }
385 break;
386 }
387
388 case PGMPOOLKIND_32BIT_PD:
389 {
390 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
391 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
392
393 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
394 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
395# ifndef IN_RING0
396 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
397 {
398 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
399 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
400 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
401 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
402 break;
403 }
404# endif /* !IN_RING0 */
405# ifndef IN_RING0
406 else
407# endif /* !IN_RING0 */
408 {
409 if (uShw.pPD->a[iShw].n.u1Present)
410 {
411 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
412 pgmPoolFree(pVM,
413 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
414 pPage->idx,
415 iShw);
416 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
417 }
418 }
419 /* paranoia / a bit assumptive. */
420 if ( (off & 3)
421 && (off & 3) + cbWrite > sizeof(X86PTE))
422 {
423 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
424 if ( iShw2 != iShw
425 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
426 {
427# ifndef IN_RING0
428 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
429 {
430 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
431 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
432 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
433 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
434 break;
435 }
436# endif /* !IN_RING0 */
437# ifndef IN_RING0
438 else
439# endif /* !IN_RING0 */
440 {
441 if (uShw.pPD->a[iShw2].n.u1Present)
442 {
443 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
444 pgmPoolFree(pVM,
445 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
446 pPage->idx,
447 iShw2);
448 ASMAtomicWriteSize(&uShw.pPD->a[iShw2].u, 0);
449 }
450 }
451 }
452 }
453#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
454 if ( uShw.pPD->a[iShw].n.u1Present
455 && !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
456 {
457 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
458# ifdef IN_RC /* TLB load - we're pushing things a bit... */
459 ASMProbeReadByte(pvAddress);
460# endif
461 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
462 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
463 }
464#endif
465 break;
466 }
467
468 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
469 {
470 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
471 const unsigned iShw = off / sizeof(X86PDEPAE);
472 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
473#ifndef IN_RING0
474 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
475 {
476 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
477 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
478 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
479 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
480 break;
481 }
482#endif /* !IN_RING0 */
483 /*
484 * Causes trouble when the guest uses a PDE to refer to the whole page table level
485 * structure. (Invalidate here; faults later on when it tries to change the page
486 * table entries -> recheck; probably only applies to the RC case.)
487 */
488# ifndef IN_RING0
489 else
490# endif /* !IN_RING0 */
491 {
492 if (uShw.pPDPae->a[iShw].n.u1Present)
493 {
494 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
495 pgmPoolFree(pVM,
496 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
497 pPage->idx,
498 iShw);
499 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
500 }
501 }
502 /* paranoia / a bit assumptive. */
503 if ( (off & 7)
504 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
505 {
506 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
507 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
508
509#ifndef IN_RING0
510 if ( iShw2 != iShw
511 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
512 {
513 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
514 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
515 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
516 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
517 break;
518 }
519#endif /* !IN_RING0 */
520# ifndef IN_RING0
521 else
522# endif /* !IN_RING0 */
523 if (uShw.pPDPae->a[iShw2].n.u1Present)
524 {
525 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
526 pgmPoolFree(pVM,
527 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
528 pPage->idx,
529 iShw2);
530 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
531 }
532 }
533 break;
534 }
535
536 case PGMPOOLKIND_PAE_PDPT:
537 {
538 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
539 /*
540 * Hopefully this doesn't happen very often:
541 * - touching unused parts of the page
542 * - messing with the bits of pd pointers without changing the physical address
543 */
544 /* PDPT roots are not page aligned; 32 byte only! */
545 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
546
547 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
548 const unsigned iShw = offPdpt / sizeof(X86PDPE);
549 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
550 {
551# ifndef IN_RING0
552 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
553 {
554 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
555 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
556 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
557 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
558 break;
559 }
560# endif /* !IN_RING0 */
561# ifndef IN_RING0
562 else
563# endif /* !IN_RING0 */
564 if (uShw.pPDPT->a[iShw].n.u1Present)
565 {
566 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
567 pgmPoolFree(pVM,
568 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
569 pPage->idx,
570 iShw);
571 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
572 }
573
574 /* paranoia / a bit assumptive. */
575 if ( (offPdpt & 7)
576 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
577 {
578 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
579 if ( iShw2 != iShw
580 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
581 {
582# ifndef IN_RING0
583 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
584 {
585 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
586 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
587 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
588 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
589 break;
590 }
591# endif /* !IN_RING0 */
592# ifndef IN_RING0
593 else
594# endif /* !IN_RING0 */
595 if (uShw.pPDPT->a[iShw2].n.u1Present)
596 {
597 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
598 pgmPoolFree(pVM,
599 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
600 pPage->idx,
601 iShw2);
602 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
603 }
604 }
605 }
606 }
607 break;
608 }
609
610#ifndef IN_RC
611 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
612 {
613 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
614 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
615 const unsigned iShw = off / sizeof(X86PDEPAE);
616 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
617 if (uShw.pPDPae->a[iShw].n.u1Present)
618 {
619 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
620 pgmPoolFree(pVM,
621 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
622 pPage->idx,
623 iShw);
624 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
625 }
626 /* paranoia / a bit assumptive. */
627 if ( (off & 7)
628 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
629 {
630 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
631 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
632
633 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
634 if (uShw.pPDPae->a[iShw2].n.u1Present)
635 {
636 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
637 pgmPoolFree(pVM,
638 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
639 pPage->idx,
640 iShw2);
641 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
642 }
643 }
644 break;
645 }
646
647 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
648 {
649 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
650 /*
651 * Hopefully this doesn't happen very often:
652 * - messing with the bits of pd pointers without changing the physical address
653 */
654 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
655 const unsigned iShw = off / sizeof(X86PDPE);
656 if (uShw.pPDPT->a[iShw].n.u1Present)
657 {
658 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
659 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
660 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
661 }
662 /* paranoia / a bit assumptive. */
663 if ( (off & 7)
664 && (off & 7) + cbWrite > sizeof(X86PDPE))
665 {
666 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
667 if (uShw.pPDPT->a[iShw2].n.u1Present)
668 {
669 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
670 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
671 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
672 }
673 }
674 break;
675 }
676
677 case PGMPOOLKIND_64BIT_PML4:
678 {
679 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
680 /*
681 * Hopefully this doesn't happen very often:
682 * - messing with the bits of pd pointers without changing the physical address
683 */
684 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
685 const unsigned iShw = off / sizeof(X86PDPE);
686 if (uShw.pPML4->a[iShw].n.u1Present)
687 {
688 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
689 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
690 ASMAtomicWriteSize(&uShw.pPML4->a[iShw].u, 0);
691 }
692 /* paranoia / a bit assumptive. */
693 if ( (off & 7)
694 && (off & 7) + cbWrite > sizeof(X86PDPE))
695 {
696 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
697 if (uShw.pPML4->a[iShw2].n.u1Present)
698 {
699 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
700 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
701 ASMAtomicWriteSize(&uShw.pPML4->a[iShw2].u, 0);
702 }
703 }
704 break;
705 }
706#endif /* IN_RING0 */
707
708 default:
709 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
710 }
711 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
712
713 /* next */
714 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
715 return;
716 pPage = &pPool->aPages[pPage->iMonitoredNext];
717 }
718}
719
720# ifndef IN_RING3
721/**
722 * Checks if a access could be a fork operation in progress.
723 *
724 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
725 *
726 * @returns true if it's likly that we're forking, otherwise false.
727 * @param pPool The pool.
728 * @param pDis The disassembled instruction.
729 * @param offFault The access offset.
730 */
731DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
732{
733 /*
734 * i386 linux is using btr to clear X86_PTE_RW.
735 * The functions involved are (2.6.16 source inspection):
736 * clear_bit
737 * ptep_set_wrprotect
738 * copy_one_pte
739 * copy_pte_range
740 * copy_pmd_range
741 * copy_pud_range
742 * copy_page_range
743 * dup_mmap
744 * dup_mm
745 * copy_mm
746 * copy_process
747 * do_fork
748 */
749 if ( pDis->pCurInstr->opcode == OP_BTR
750 && !(offFault & 4)
751 /** @todo Validate that the bit index is X86_PTE_RW. */
752 )
753 {
754 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
755 return true;
756 }
757 return false;
758}
759
760
761/**
762 * Determine whether the page is likely to have been reused.
763 *
764 * @returns true if we consider the page as being reused for a different purpose.
765 * @returns false if we consider it to still be a paging page.
766 * @param pVM VM Handle.
767 * @param pVCpu VMCPU Handle.
768 * @param pRegFrame Trap register frame.
769 * @param pDis The disassembly info for the faulting instruction.
770 * @param pvFault The fault address.
771 *
772 * @remark The REP prefix check is left to the caller because of STOSD/W.
773 */
774DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
775{
776#ifndef IN_RC
777 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
778 if ( HWACCMHasPendingIrq(pVM)
779 && (pRegFrame->rsp - pvFault) < 32)
780 {
781 /* Fault caused by stack writes while trying to inject an interrupt event. */
782 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
783 return true;
784 }
785#else
786 NOREF(pVM); NOREF(pvFault);
787#endif
788
789 LogFlow(("Reused instr %RGv %d at %RGv param1.flags=%x param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->opcode, pvFault, pDis->param1.flags, pDis->param1.base.reg_gen));
790
791 /* Non-supervisor mode write means it's used for something else. */
792 if (CPUMGetGuestCPL(pVCpu, pRegFrame) != 0)
793 return true;
794
795 switch (pDis->pCurInstr->opcode)
796 {
797 /* call implies the actual push of the return address faulted */
798 case OP_CALL:
799 Log4(("pgmPoolMonitorIsReused: CALL\n"));
800 return true;
801 case OP_PUSH:
802 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
803 return true;
804 case OP_PUSHF:
805 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
806 return true;
807 case OP_PUSHA:
808 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
809 return true;
810 case OP_FXSAVE:
811 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
812 return true;
813 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
814 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
815 return true;
816 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
817 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
818 return true;
819 case OP_MOVSWD:
820 case OP_STOSWD:
821 if ( pDis->prefix == (PREFIX_REP|PREFIX_REX)
822 && pRegFrame->rcx >= 0x40
823 )
824 {
825 Assert(pDis->mode == CPUMODE_64BIT);
826
827 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
828 return true;
829 }
830 return false;
831 }
832 if ( ( (pDis->param1.flags & USE_REG_GEN32)
833 || (pDis->param1.flags & USE_REG_GEN64))
834 && (pDis->param1.base.reg_gen == USE_REG_ESP))
835 {
836 Log4(("pgmPoolMonitorIsReused: ESP\n"));
837 return true;
838 }
839
840 return false;
841}
842
843/**
844 * Flushes the page being accessed.
845 *
846 * @returns VBox status code suitable for scheduling.
847 * @param pVM The VM handle.
848 * @param pVCpu The VMCPU handle.
849 * @param pPool The pool.
850 * @param pPage The pool page (head).
851 * @param pDis The disassembly of the write instruction.
852 * @param pRegFrame The trap register frame.
853 * @param GCPhysFault The fault address as guest physical address.
854 * @param pvFault The fault address.
855 * @todo VBOXSTRICTRC
856 */
857static int pgmPoolAccessHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
858 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
859{
860 /*
861 * First, do the flushing.
862 */
863 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
864
865 /*
866 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
867 * Must do this in raw mode (!); XP boot will fail otherwise.
868 */
869 uint32_t cbWritten;
870 VBOXSTRICTRC rc2 = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL, &cbWritten);
871 if (RT_SUCCESS(rc2))
872 {
873 pRegFrame->rip += pDis->opsize;
874 AssertMsg(rc2 == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
875 }
876 else if (rc2 == VERR_EM_INTERPRETER)
877 {
878#ifdef IN_RC
879 if (PATMIsPatchGCAddr(pVM, pRegFrame->eip))
880 {
881 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
882 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
883 rc = VINF_SUCCESS;
884 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
885 }
886 else
887#endif
888 {
889 rc = VINF_EM_RAW_EMULATE_INSTR;
890 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
891 }
892 }
893 else
894 rc = VBOXSTRICTRC_VAL(rc2);
895
896 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
897 return rc;
898}
899
900/**
901 * Handles the STOSD write accesses.
902 *
903 * @returns VBox status code suitable for scheduling.
904 * @param pVM The VM handle.
905 * @param pPool The pool.
906 * @param pPage The pool page (head).
907 * @param pDis The disassembly of the write instruction.
908 * @param pRegFrame The trap register frame.
909 * @param GCPhysFault The fault address as guest physical address.
910 * @param pvFault The fault address.
911 */
912DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
913 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
914{
915 unsigned uIncrement = pDis->param1.size;
916
917 Assert(pDis->mode == CPUMODE_32BIT || pDis->mode == CPUMODE_64BIT);
918 Assert(pRegFrame->rcx <= 0x20);
919
920#ifdef VBOX_STRICT
921 if (pDis->opmode == CPUMODE_32BIT)
922 Assert(uIncrement == 4);
923 else
924 Assert(uIncrement == 8);
925#endif
926
927 Log3(("pgmPoolAccessHandlerSTOSD\n"));
928
929 /*
930 * Increment the modification counter and insert it into the list
931 * of modified pages the first time.
932 */
933 if (!pPage->cModifications++)
934 pgmPoolMonitorModifiedInsert(pPool, pPage);
935
936 /*
937 * Execute REP STOSD.
938 *
939 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
940 * write situation, meaning that it's safe to write here.
941 */
942 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
943 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
944 while (pRegFrame->rcx)
945 {
946#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
947 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
948 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
949 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
950#else
951 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
952#endif
953#ifdef IN_RC
954 *(uint32_t *)(uintptr_t)pu32 = pRegFrame->eax;
955#else
956 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
957#endif
958 pu32 += uIncrement;
959 GCPhysFault += uIncrement;
960 pRegFrame->rdi += uIncrement;
961 pRegFrame->rcx--;
962 }
963 pRegFrame->rip += pDis->opsize;
964
965 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
966 return VINF_SUCCESS;
967}
968
969
970/**
971 * Handles the simple write accesses.
972 *
973 * @returns VBox status code suitable for scheduling.
974 * @param pVM The VM handle.
975 * @param pVCpu The VMCPU handle.
976 * @param pPool The pool.
977 * @param pPage The pool page (head).
978 * @param pDis The disassembly of the write instruction.
979 * @param pRegFrame The trap register frame.
980 * @param GCPhysFault The fault address as guest physical address.
981 * @param pvFault The fault address.
982 * @param pfReused Reused state (out)
983 */
984DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
985 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
986{
987 Log3(("pgmPoolAccessHandlerSimple\n"));
988 /*
989 * Increment the modification counter and insert it into the list
990 * of modified pages the first time.
991 */
992 if (!pPage->cModifications++)
993 pgmPoolMonitorModifiedInsert(pPool, pPage);
994
995 /*
996 * Clear all the pages. ASSUMES that pvFault is readable.
997 */
998#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
999 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
1000 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
1001 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
1002#else
1003 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
1004#endif
1005
1006 /*
1007 * Interpret the instruction.
1008 */
1009 uint32_t cb;
1010 VBOXSTRICTRC rc = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL, &cb);
1011 if (RT_SUCCESS(rc))
1012 {
1013 pRegFrame->rip += pDis->opsize;
1014 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
1015 }
1016 else if (rc == VERR_EM_INTERPRETER)
1017 {
1018 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1019 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode));
1020 rc = VINF_EM_RAW_EMULATE_INSTR;
1021 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1022 }
1023
1024#if 0 /* experimental code */
1025 if (rc == VINF_SUCCESS)
1026 {
1027 switch (pPage->enmKind)
1028 {
1029 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1030 {
1031 X86PTEPAE GstPte;
1032 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1033 AssertRC(rc);
1034
1035 /* Check the new value written by the guest. If present and with a bogus physical address, then
1036 * it's fairly safe to assume the guest is reusing the PT.
1037 */
1038 if (PGM_POOL_IS_PAE_PTE_PRESENT(GstPte))
1039 {
1040 RTHCPHYS HCPhys = -1;
1041 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1042 if (rc != VINF_SUCCESS)
1043 {
1044 *pfReused = true;
1045 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1046 }
1047 }
1048 break;
1049 }
1050 }
1051 }
1052#endif
1053
1054 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", VBOXSTRICTRC_VAL(rc), cb));
1055 return VBOXSTRICTRC_VAL(rc);
1056}
1057
1058/**
1059 * \#PF Handler callback for PT write accesses.
1060 *
1061 * @returns VBox status code (appropriate for GC return).
1062 * @param pVM VM Handle.
1063 * @param uErrorCode CPU Error code.
1064 * @param pRegFrame Trap register frame.
1065 * NULL on DMA and other non CPU access.
1066 * @param pvFault The fault address (cr2).
1067 * @param GCPhysFault The GC physical address corresponding to pvFault.
1068 * @param pvUser User argument.
1069 */
1070DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1071{
1072 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1073 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1074 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1075 PVMCPU pVCpu = VMMGetCpu(pVM);
1076 unsigned cMaxModifications;
1077 bool fForcedFlush = false;
1078
1079 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1080
1081 pgmLock(pVM);
1082 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1083 {
1084 /* Pool page changed while we were waiting for the lock; ignore. */
1085 Log(("CPU%d: pgmPoolAccessHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1086 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1087 pgmUnlock(pVM);
1088 return VINF_SUCCESS;
1089 }
1090#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1091 if (pPage->fDirty)
1092 {
1093 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH));
1094 pgmUnlock(pVM);
1095 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1096 }
1097#endif
1098
1099#if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1100 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1101 {
1102 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1103 void *pvGst;
1104 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1105 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
1106 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1107 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1108 }
1109#endif
1110
1111 /*
1112 * Disassemble the faulting instruction.
1113 */
1114 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1115 int rc = EMInterpretDisasOne(pVM, pVCpu, pRegFrame, pDis, NULL);
1116 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1117 {
1118 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1119 pgmUnlock(pVM);
1120 return rc;
1121 }
1122
1123 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1124
1125 /*
1126 * We should ALWAYS have the list head as user parameter. This
1127 * is because we use that page to record the changes.
1128 */
1129 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1130
1131#ifdef IN_RING0
1132 /* Maximum nr of modifications depends on the page type. */
1133 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1134 cMaxModifications = 4;
1135 else
1136 cMaxModifications = 24;
1137#else
1138 cMaxModifications = 48;
1139#endif
1140
1141 /*
1142 * Incremental page table updates should weigh more than random ones.
1143 * (Only applies when started from offset 0)
1144 */
1145 pVCpu->pgm.s.cPoolAccessHandler++;
1146 if ( pPage->pvLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1147 && pPage->pvLastAccessHandlerRip < pRegFrame->rip + 0x40
1148 && pvFault == (pPage->pvLastAccessHandlerFault + pDis->param1.size)
1149 && pVCpu->pgm.s.cPoolAccessHandler == (pPage->cLastAccessHandlerCount + 1))
1150 {
1151 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1152 Assert(pPage->cModifications < 32000);
1153 pPage->cModifications = pPage->cModifications * 2;
1154 pPage->pvLastAccessHandlerFault = pvFault;
1155 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1156 if (pPage->cModifications >= cMaxModifications)
1157 {
1158 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1159 fForcedFlush = true;
1160 }
1161 }
1162
1163 if (pPage->cModifications >= cMaxModifications)
1164 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1165
1166 /*
1167 * Check if it's worth dealing with.
1168 */
1169 bool fReused = false;
1170 bool fNotReusedNotForking = false;
1171 if ( ( pPage->cModifications < cMaxModifications /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1172 || pgmPoolIsPageLocked(&pVM->pgm.s, pPage)
1173 )
1174 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1175 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1176 {
1177 /*
1178 * Simple instructions, no REP prefix.
1179 */
1180 if (!(pDis->prefix & (PREFIX_REP | PREFIX_REPNE)))
1181 {
1182 rc = pgmPoolAccessHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1183 if (fReused)
1184 goto flushPage;
1185
1186 /* A mov instruction to change the first page table entry will be remembered so we can detect
1187 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1188 */
1189 if ( rc == VINF_SUCCESS
1190 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1191 && pDis->pCurInstr->opcode == OP_MOV
1192 && (pvFault & PAGE_OFFSET_MASK) == 0)
1193 {
1194 pPage->pvLastAccessHandlerFault = pvFault;
1195 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1196 pPage->pvLastAccessHandlerRip = pRegFrame->rip;
1197 /* Make sure we don't kick out a page too quickly. */
1198 if (pPage->cModifications > 8)
1199 pPage->cModifications = 2;
1200 }
1201 else
1202 if (pPage->pvLastAccessHandlerFault == pvFault)
1203 {
1204 /* ignore the 2nd write to this page table entry. */
1205 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1206 }
1207 else
1208 {
1209 pPage->pvLastAccessHandlerFault = 0;
1210 pPage->pvLastAccessHandlerRip = 0;
1211 }
1212
1213 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1214 pgmUnlock(pVM);
1215 return rc;
1216 }
1217
1218 /*
1219 * Windows is frequently doing small memset() operations (netio test 4k+).
1220 * We have to deal with these or we'll kill the cache and performance.
1221 */
1222 if ( pDis->pCurInstr->opcode == OP_STOSWD
1223 && !pRegFrame->eflags.Bits.u1DF
1224 && pDis->opmode == pDis->mode
1225 && pDis->addrmode == pDis->mode)
1226 {
1227 bool fValidStosd = false;
1228
1229 if ( pDis->mode == CPUMODE_32BIT
1230 && pDis->prefix == PREFIX_REP
1231 && pRegFrame->ecx <= 0x20
1232 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1233 && !((uintptr_t)pvFault & 3)
1234 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1235 )
1236 {
1237 fValidStosd = true;
1238 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1239 }
1240 else
1241 if ( pDis->mode == CPUMODE_64BIT
1242 && pDis->prefix == (PREFIX_REP | PREFIX_REX)
1243 && pRegFrame->rcx <= 0x20
1244 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1245 && !((uintptr_t)pvFault & 7)
1246 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1247 )
1248 {
1249 fValidStosd = true;
1250 }
1251
1252 if (fValidStosd)
1253 {
1254 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1255 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1256 pgmUnlock(pVM);
1257 return rc;
1258 }
1259 }
1260
1261 /* REP prefix, don't bother. */
1262 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1263 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1264 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode, pDis->prefix));
1265 fNotReusedNotForking = true;
1266 }
1267
1268#if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1269 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1270 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1271 */
1272 if ( pPage->cModifications >= cMaxModifications
1273 && !fForcedFlush
1274 && pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1275 && ( fNotReusedNotForking
1276 || ( !pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
1277 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1278 )
1279 )
1280 {
1281 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
1282 Assert(pPage->fDirty == false);
1283
1284 /* Flush any monitored duplicates as we will disable write protection. */
1285 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1286 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1287 {
1288 PPGMPOOLPAGE pPageHead = pPage;
1289
1290 /* Find the monitor head. */
1291 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1292 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1293
1294 while (pPageHead)
1295 {
1296 unsigned idxNext = pPageHead->iMonitoredNext;
1297
1298 if (pPageHead != pPage)
1299 {
1300 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1301 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1302 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1303 AssertRC(rc2);
1304 }
1305
1306 if (idxNext == NIL_PGMPOOL_IDX)
1307 break;
1308
1309 pPageHead = &pPool->aPages[idxNext];
1310 }
1311 }
1312
1313 /* The flushing above might fail for locked pages, so double check. */
1314 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1315 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1316 {
1317 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1318
1319 /* Temporarily allow write access to the page table again. */
1320 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys, pPage->GCPhys);
1321 if (rc == VINF_SUCCESS)
1322 {
1323 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1324 AssertMsg(rc == VINF_SUCCESS
1325 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1326 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1327 || rc == VERR_PAGE_NOT_PRESENT,
1328 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1329
1330 pPage->pvDirtyFault = pvFault;
1331
1332 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1333 pgmUnlock(pVM);
1334 return rc;
1335 }
1336 }
1337 }
1338#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1339
1340 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushModOverflow));
1341flushPage:
1342 /*
1343 * Not worth it, so flush it.
1344 *
1345 * If we considered it to be reused, don't go back to ring-3
1346 * to emulate failed instructions since we usually cannot
1347 * interpret then. This may be a bit risky, in which case
1348 * the reuse detection must be fixed.
1349 */
1350 rc = pgmPoolAccessHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1351 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1352 && fReused)
1353 {
1354 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1355 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1356 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1357 }
1358 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1359 pgmUnlock(pVM);
1360 return rc;
1361}
1362
1363# endif /* !IN_RING3 */
1364
1365# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1366
1367# ifdef VBOX_STRICT
1368/**
1369 * Check references to guest physical memory in a PAE / PAE page table.
1370 *
1371 * @param pPool The pool.
1372 * @param pPage The page.
1373 * @param pShwPT The shadow page table (mapping of the page).
1374 * @param pGstPT The guest page table.
1375 */
1376static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
1377{
1378 unsigned cErrors = 0;
1379 int LastRc = -1; /* initialized to shut up gcc */
1380 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1381 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1382 PVM pVM = pPool->CTX_SUFF(pVM);
1383
1384#ifdef VBOX_STRICT
1385 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1386 AssertMsg(!PGM_POOL_IS_PAE_PTE_PRESENT(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, pShwPT->a[i].u, pPage->iFirstPresent));
1387#endif
1388 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1389 {
1390 if (PGM_POOL_IS_PAE_PTE_PRESENT(pShwPT->a[i]))
1391 {
1392 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1393 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1394 if ( rc != VINF_SUCCESS
1395 || (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) != HCPhys)
1396 {
1397 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, pShwPT->a[i].u, HCPhys));
1398 LastPTE = i;
1399 LastRc = rc;
1400 LastHCPhys = HCPhys;
1401 cErrors++;
1402
1403 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1404 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1405 AssertRC(rc);
1406
1407 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1408 {
1409 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1410
1411 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1412 {
1413 PX86PTPAE pShwPT2 = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1414
1415 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1416 {
1417 if ( PGM_POOL_IS_PAE_PTE_PRESENT(pShwPT2->a[j])
1418 && pShwPT2->a[j].n.u1Write
1419 && (pShwPT2->a[j].u & X86_PTE_PAE_PG_MASK) == HCPhysPT)
1420 {
1421 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, pShwPT->a[j].u, pShwPT2->a[j].u));
1422 }
1423 }
1424
1425 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1426 }
1427 }
1428 }
1429 }
1430 }
1431 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, pShwPT->a[LastPTE].u, LastHCPhys));
1432}
1433# endif /* VBOX_STRICT */
1434
1435/**
1436 * Clear references to guest physical memory in a PAE / PAE page table.
1437 *
1438 * @returns nr of changed PTEs
1439 * @param pPool The pool.
1440 * @param pPage The page.
1441 * @param pShwPT The shadow page table (mapping of the page).
1442 * @param pGstPT The guest page table.
1443 * @param pOldGstPT The old cached guest page table.
1444 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1445 * @param pfFlush Flush reused page table (out)
1446 */
1447DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT,
1448 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1449{
1450 unsigned cChanged = 0;
1451
1452#ifdef VBOX_STRICT
1453 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1454 AssertMsg(!PGM_POOL_IS_PAE_PTE_PRESENT(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, pShwPT->a[i].u, pPage->iFirstPresent));
1455#endif
1456 *pfFlush = false;
1457
1458 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1459 {
1460 /* Check the new value written by the guest. If present and with a bogus physical address, then
1461 * it's fairly safe to assume the guest is reusing the PT.
1462 */
1463 if ( fAllowRemoval
1464 && pGstPT->a[i].n.u1Present)
1465 {
1466 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1467 {
1468 *pfFlush = true;
1469 return ++cChanged;
1470 }
1471 }
1472 if (PGM_POOL_IS_PAE_PTE_PRESENT(pShwPT->a[i]))
1473 {
1474 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1475 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1476 {
1477#ifdef VBOX_STRICT
1478 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1479 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1480 AssertMsg(rc == VINF_SUCCESS && (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, pShwPT->a[i].u, HCPhys));
1481#endif
1482 uint64_t uHostAttr = pShwPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1483 bool fHostRW = !!(pShwPT->a[i].u & X86_PTE_RW);
1484 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1485 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1486
1487 if ( uHostAttr == uGuestAttr
1488 && fHostRW <= fGuestRW)
1489 continue;
1490 }
1491 cChanged++;
1492 /* Something was changed, so flush it. */
1493 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1494 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1495 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1496 ASMAtomicWriteSize(&pShwPT->a[i].u, 0);
1497 }
1498 }
1499 return cChanged;
1500}
1501
1502
1503/**
1504 * Flush a dirty page
1505 *
1506 * @param pVM VM Handle.
1507 * @param pPool The pool.
1508 * @param idxSlot Dirty array slot index
1509 * @param fAllowRemoval Allow a reused page table to be removed
1510 */
1511static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1512{
1513 PPGMPOOLPAGE pPage;
1514 unsigned idxPage;
1515
1516 Assert(idxSlot < RT_ELEMENTS(pPool->aIdxDirtyPages));
1517 if (pPool->aIdxDirtyPages[idxSlot] == NIL_PGMPOOL_IDX)
1518 return;
1519
1520 idxPage = pPool->aIdxDirtyPages[idxSlot];
1521 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1522 pPage = &pPool->aPages[idxPage];
1523 Assert(pPage->idx == idxPage);
1524 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1525
1526 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1527 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1528
1529 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1530 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys);
1531 Assert(rc == VINF_SUCCESS);
1532 pPage->fDirty = false;
1533
1534#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1535 PVMCPU pVCpu = VMMGetCpu(pVM);
1536 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
1537#endif
1538
1539#ifdef VBOX_STRICT
1540 uint64_t fFlags = 0;
1541 RTHCPHYS HCPhys;
1542 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->pvDirtyFault, &fFlags, &HCPhys);
1543 AssertMsg( ( rc == VINF_SUCCESS
1544 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1545 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1546 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1547 || rc == VERR_PAGE_NOT_PRESENT,
1548 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->pvDirtyFault, rc, fFlags));
1549#endif
1550
1551 /* Flush those PTEs that have changed. */
1552 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1553 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1554 void *pvGst;
1555 rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1556 bool fFlush;
1557 unsigned cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst,
1558 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot][0], fAllowRemoval, &fFlush);
1559 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1560 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1561 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1562 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1563
1564 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1565 Assert(pPage->cModifications);
1566 if (cChanges < 4)
1567 pPage->cModifications = 1; /* must use > 0 here */
1568 else
1569 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1570
1571 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1572 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages))
1573 pPool->idxFreeDirtyPage = idxSlot;
1574
1575 pPool->cDirtyPages--;
1576 pPool->aIdxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1577 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1578 if (fFlush)
1579 {
1580 Assert(fAllowRemoval);
1581 Log(("Flush reused page table!\n"));
1582 pgmPoolFlushPage(pPool, pPage);
1583 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1584 }
1585 else
1586 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1587
1588#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_R0) || defined(IN_RC)
1589 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
1590#endif
1591}
1592
1593# ifndef IN_RING3
1594/**
1595 * Add a new dirty page
1596 *
1597 * @param pVM VM Handle.
1598 * @param pPool The pool.
1599 * @param pPage The page.
1600 */
1601void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1602{
1603 unsigned idxFree;
1604
1605 Assert(PGMIsLocked(pVM));
1606 AssertCompile(RT_ELEMENTS(pPool->aIdxDirtyPages) == 8 || RT_ELEMENTS(pPool->aIdxDirtyPages) == 16);
1607 Assert(!pPage->fDirty);
1608
1609 idxFree = pPool->idxFreeDirtyPage;
1610 Assert(idxFree < RT_ELEMENTS(pPool->aIdxDirtyPages));
1611 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1612
1613 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aIdxDirtyPages))
1614 {
1615 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1616 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1617 }
1618 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages));
1619 AssertMsg(pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1620
1621 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1622
1623 /*
1624 * Make a copy of the guest page table as we require valid GCPhys addresses
1625 * when removing references to physical pages.
1626 * (The HCPhys linear lookup is *extremely* expensive!)
1627 */
1628 void *pvGst;
1629 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1630 memcpy(&pPool->aDirtyPages[idxFree][0], pvGst, PAGE_SIZE);
1631#ifdef VBOX_STRICT
1632 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1633 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
1634 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1635#endif
1636 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1637
1638 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1639 pPage->fDirty = true;
1640 pPage->idxDirty = idxFree;
1641 pPool->aIdxDirtyPages[idxFree] = pPage->idx;
1642 pPool->cDirtyPages++;
1643
1644 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1645 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages)
1646 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1647 {
1648 unsigned i;
1649 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1650 {
1651 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1652 if (pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1653 {
1654 pPool->idxFreeDirtyPage = idxFree;
1655 break;
1656 }
1657 }
1658 Assert(i != RT_ELEMENTS(pPool->aIdxDirtyPages));
1659 }
1660
1661 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages) || pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1662 return;
1663}
1664# endif /* !IN_RING3 */
1665
1666/**
1667 * Check if the specified page is dirty (not write monitored)
1668 *
1669 * @return dirty or not
1670 * @param pVM VM Handle.
1671 * @param GCPhys Guest physical address
1672 */
1673bool pgmPoolIsDirtyPage(PVM pVM, RTGCPHYS GCPhys)
1674{
1675 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1676 Assert(PGMIsLocked(pVM));
1677 if (!pPool->cDirtyPages)
1678 return false;
1679
1680 GCPhys = GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1681
1682 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1683 {
1684 if (pPool->aIdxDirtyPages[i] != NIL_PGMPOOL_IDX)
1685 {
1686 PPGMPOOLPAGE pPage;
1687 unsigned idxPage = pPool->aIdxDirtyPages[i];
1688
1689 pPage = &pPool->aPages[idxPage];
1690 if (pPage->GCPhys == GCPhys)
1691 return true;
1692 }
1693 }
1694 return false;
1695}
1696
1697/**
1698 * Reset all dirty pages by reinstating page monitoring.
1699 *
1700 * @param pVM VM Handle.
1701 */
1702void pgmPoolResetDirtyPages(PVM pVM)
1703{
1704 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1705 Assert(PGMIsLocked(pVM));
1706 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1707
1708 if (!pPool->cDirtyPages)
1709 return;
1710
1711 Log(("pgmPoolResetDirtyPages\n"));
1712 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1713 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1714
1715 pPool->idxFreeDirtyPage = 0;
1716 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aIdxDirtyPages)
1717 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1718 {
1719 unsigned i;
1720 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1721 {
1722 if (pPool->aIdxDirtyPages[i] == NIL_PGMPOOL_IDX)
1723 {
1724 pPool->idxFreeDirtyPage = i;
1725 break;
1726 }
1727 }
1728 AssertMsg(i != RT_ELEMENTS(pPool->aIdxDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1729 }
1730
1731 Assert(pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages));
1732 return;
1733}
1734
1735/**
1736 * Reset all dirty pages by reinstating page monitoring.
1737 *
1738 * @param pVM VM Handle.
1739 * @param GCPhysPT Physical address of the page table
1740 */
1741void pgmPoolInvalidateDirtyPage(PVM pVM, RTGCPHYS GCPhysPT)
1742{
1743 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1744 Assert(PGMIsLocked(pVM));
1745 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1746 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aIdxDirtyPages);
1747
1748 if (!pPool->cDirtyPages)
1749 return;
1750
1751 GCPhysPT = GCPhysPT & ~(RTGCPHYS)(PAGE_SIZE - 1);
1752
1753 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1754 {
1755 if (pPool->aIdxDirtyPages[i] != NIL_PGMPOOL_IDX)
1756 {
1757 unsigned idxPage = pPool->aIdxDirtyPages[i];
1758
1759 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1760 if (pPage->GCPhys == GCPhysPT)
1761 {
1762 idxDirtyPage = i;
1763 break;
1764 }
1765 }
1766 }
1767
1768 if (idxDirtyPage != RT_ELEMENTS(pPool->aIdxDirtyPages))
1769 {
1770 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
1771 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aIdxDirtyPages)
1772 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1773 {
1774 unsigned i;
1775 for (i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1776 {
1777 if (pPool->aIdxDirtyPages[i] == NIL_PGMPOOL_IDX)
1778 {
1779 pPool->idxFreeDirtyPage = i;
1780 break;
1781 }
1782 }
1783 AssertMsg(i != RT_ELEMENTS(pPool->aIdxDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1784 }
1785 }
1786}
1787
1788# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1789
1790/**
1791 * Inserts a page into the GCPhys hash table.
1792 *
1793 * @param pPool The pool.
1794 * @param pPage The page.
1795 */
1796DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1797{
1798 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1799 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1800 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1801 pPage->iNext = pPool->aiHash[iHash];
1802 pPool->aiHash[iHash] = pPage->idx;
1803}
1804
1805
1806/**
1807 * Removes a page from the GCPhys hash table.
1808 *
1809 * @param pPool The pool.
1810 * @param pPage The page.
1811 */
1812DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1813{
1814 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1815 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1816 if (pPool->aiHash[iHash] == pPage->idx)
1817 pPool->aiHash[iHash] = pPage->iNext;
1818 else
1819 {
1820 uint16_t iPrev = pPool->aiHash[iHash];
1821 for (;;)
1822 {
1823 const int16_t i = pPool->aPages[iPrev].iNext;
1824 if (i == pPage->idx)
1825 {
1826 pPool->aPages[iPrev].iNext = pPage->iNext;
1827 break;
1828 }
1829 if (i == NIL_PGMPOOL_IDX)
1830 {
1831 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1832 break;
1833 }
1834 iPrev = i;
1835 }
1836 }
1837 pPage->iNext = NIL_PGMPOOL_IDX;
1838}
1839
1840
1841/**
1842 * Frees up one cache page.
1843 *
1844 * @returns VBox status code.
1845 * @retval VINF_SUCCESS on success.
1846 * @param pPool The pool.
1847 * @param iUser The user index.
1848 */
1849static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1850{
1851#ifndef IN_RC
1852 const PVM pVM = pPool->CTX_SUFF(pVM);
1853#endif
1854 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1855 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1856
1857 /*
1858 * Select one page from the tail of the age list.
1859 */
1860 PPGMPOOLPAGE pPage;
1861 for (unsigned iLoop = 0; ; iLoop++)
1862 {
1863 uint16_t iToFree = pPool->iAgeTail;
1864 if (iToFree == iUser)
1865 iToFree = pPool->aPages[iToFree].iAgePrev;
1866/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1867 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1868 {
1869 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1870 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1871 {
1872 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1873 continue;
1874 iToFree = i;
1875 break;
1876 }
1877 }
1878*/
1879 Assert(iToFree != iUser);
1880 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1881 pPage = &pPool->aPages[iToFree];
1882
1883 /*
1884 * Reject any attempts at flushing the currently active shadow CR3 mapping.
1885 * Call pgmPoolCacheUsed to move the page to the head of the age list.
1886 */
1887 if (!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage))
1888 break;
1889 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
1890 pgmPoolCacheUsed(pPool, pPage);
1891 AssertLogRelReturn(iLoop < 8192, VERR_INTERNAL_ERROR);
1892 }
1893
1894 /*
1895 * Found a usable page, flush it and return.
1896 */
1897 int rc = pgmPoolFlushPage(pPool, pPage);
1898 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
1899 /* todo: find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
1900 if (rc == VINF_SUCCESS)
1901 PGM_INVL_ALL_VCPU_TLBS(pVM);
1902 return rc;
1903}
1904
1905
1906/**
1907 * Checks if a kind mismatch is really a page being reused
1908 * or if it's just normal remappings.
1909 *
1910 * @returns true if reused and the cached page (enmKind1) should be flushed
1911 * @returns false if not reused.
1912 * @param enmKind1 The kind of the cached page.
1913 * @param enmKind2 The kind of the requested page.
1914 */
1915static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1916{
1917 switch (enmKind1)
1918 {
1919 /*
1920 * Never reuse them. There is no remapping in non-paging mode.
1921 */
1922 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1923 case PGMPOOLKIND_32BIT_PD_PHYS:
1924 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1925 case PGMPOOLKIND_PAE_PD_PHYS:
1926 case PGMPOOLKIND_PAE_PDPT_PHYS:
1927 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1928 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1929 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1930 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1931 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1932 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
1933 return false;
1934
1935 /*
1936 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1937 */
1938 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1939 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1940 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1941 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1942 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1943 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1944 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1945 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1946 case PGMPOOLKIND_32BIT_PD:
1947 case PGMPOOLKIND_PAE_PDPT:
1948 switch (enmKind2)
1949 {
1950 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1951 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1952 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1953 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1954 case PGMPOOLKIND_64BIT_PML4:
1955 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1956 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1957 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1958 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1959 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1960 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1961 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1962 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1963 return true;
1964 default:
1965 return false;
1966 }
1967
1968 /*
1969 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1970 */
1971 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1972 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1973 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1974 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1975 case PGMPOOLKIND_64BIT_PML4:
1976 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1977 switch (enmKind2)
1978 {
1979 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1980 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1981 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1982 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1983 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1984 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1985 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1986 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1987 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1988 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1989 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1990 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1991 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1992 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1993 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1994 return true;
1995 default:
1996 return false;
1997 }
1998
1999 /*
2000 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2001 */
2002 case PGMPOOLKIND_ROOT_NESTED:
2003 return false;
2004
2005 default:
2006 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2007 }
2008}
2009
2010
2011/**
2012 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2013 *
2014 * @returns VBox status code.
2015 * @retval VINF_PGM_CACHED_PAGE on success.
2016 * @retval VERR_FILE_NOT_FOUND if not found.
2017 * @param pPool The pool.
2018 * @param GCPhys The GC physical address of the page we're gonna shadow.
2019 * @param enmKind The kind of mapping.
2020 * @param enmAccess Access type for the mapping (only relevant for big pages)
2021 * @param iUser The shadow page pool index of the user table.
2022 * @param iUserTable The index into the user table (shadowed).
2023 * @param ppPage Where to store the pointer to the page.
2024 */
2025static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2026{
2027#ifndef IN_RC
2028 const PVM pVM = pPool->CTX_SUFF(pVM);
2029#endif
2030 /*
2031 * Look up the GCPhys in the hash.
2032 */
2033 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2034 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%x iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2035 if (i != NIL_PGMPOOL_IDX)
2036 {
2037 do
2038 {
2039 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2040 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2041 if (pPage->GCPhys == GCPhys)
2042 {
2043 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2044 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess)
2045 {
2046 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2047 * doesn't flush it in case there are no more free use records.
2048 */
2049 pgmPoolCacheUsed(pPool, pPage);
2050
2051 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2052 if (RT_SUCCESS(rc))
2053 {
2054 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2055 *ppPage = pPage;
2056 if (pPage->cModifications)
2057 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2058 STAM_COUNTER_INC(&pPool->StatCacheHits);
2059 return VINF_PGM_CACHED_PAGE;
2060 }
2061 return rc;
2062 }
2063
2064 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2065 {
2066 /*
2067 * The kind is different. In some cases we should now flush the page
2068 * as it has been reused, but in most cases this is normal remapping
2069 * of PDs as PT or big pages using the GCPhys field in a slightly
2070 * different way than the other kinds.
2071 */
2072 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2073 {
2074 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2075 pgmPoolFlushPage(pPool, pPage);
2076 break;
2077 }
2078 }
2079 }
2080
2081 /* next */
2082 i = pPage->iNext;
2083 } while (i != NIL_PGMPOOL_IDX);
2084 }
2085
2086 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2087 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2088 return VERR_FILE_NOT_FOUND;
2089}
2090
2091
2092/**
2093 * Inserts a page into the cache.
2094 *
2095 * @param pPool The pool.
2096 * @param pPage The cached page.
2097 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2098 */
2099static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2100{
2101 /*
2102 * Insert into the GCPhys hash if the page is fit for that.
2103 */
2104 Assert(!pPage->fCached);
2105 if (fCanBeCached)
2106 {
2107 pPage->fCached = true;
2108 pgmPoolHashInsert(pPool, pPage);
2109 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2110 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2111 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2112 }
2113 else
2114 {
2115 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2116 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2117 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2118 }
2119
2120 /*
2121 * Insert at the head of the age list.
2122 */
2123 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2124 pPage->iAgeNext = pPool->iAgeHead;
2125 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2126 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2127 else
2128 pPool->iAgeTail = pPage->idx;
2129 pPool->iAgeHead = pPage->idx;
2130}
2131
2132
2133/**
2134 * Flushes a cached page.
2135 *
2136 * @param pPool The pool.
2137 * @param pPage The cached page.
2138 */
2139static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2140{
2141 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2142
2143 /*
2144 * Remove the page from the hash.
2145 */
2146 if (pPage->fCached)
2147 {
2148 pPage->fCached = false;
2149 pgmPoolHashRemove(pPool, pPage);
2150 }
2151 else
2152 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2153
2154 /*
2155 * Remove it from the age list.
2156 */
2157 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2158 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2159 else
2160 pPool->iAgeTail = pPage->iAgePrev;
2161 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2162 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2163 else
2164 pPool->iAgeHead = pPage->iAgeNext;
2165 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2166 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2167}
2168
2169
2170/**
2171 * Looks for pages sharing the monitor.
2172 *
2173 * @returns Pointer to the head page.
2174 * @returns NULL if not found.
2175 * @param pPool The Pool
2176 * @param pNewPage The page which is going to be monitored.
2177 */
2178static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2179{
2180 /*
2181 * Look up the GCPhys in the hash.
2182 */
2183 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
2184 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2185 if (i == NIL_PGMPOOL_IDX)
2186 return NULL;
2187 do
2188 {
2189 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2190 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2191 && pPage != pNewPage)
2192 {
2193 switch (pPage->enmKind)
2194 {
2195 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2196 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2197 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2198 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2199 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2200 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2201 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2202 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2203 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2204 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2205 case PGMPOOLKIND_64BIT_PML4:
2206 case PGMPOOLKIND_32BIT_PD:
2207 case PGMPOOLKIND_PAE_PDPT:
2208 {
2209 /* find the head */
2210 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2211 {
2212 Assert(pPage->iMonitoredPrev != pPage->idx);
2213 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2214 }
2215 return pPage;
2216 }
2217
2218 /* ignore, no monitoring. */
2219 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2220 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2221 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2222 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2223 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2224 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2225 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2226 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2227 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2228 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2229 case PGMPOOLKIND_ROOT_NESTED:
2230 case PGMPOOLKIND_PAE_PD_PHYS:
2231 case PGMPOOLKIND_PAE_PDPT_PHYS:
2232 case PGMPOOLKIND_32BIT_PD_PHYS:
2233 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2234 break;
2235 default:
2236 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2237 }
2238 }
2239
2240 /* next */
2241 i = pPage->iNext;
2242 } while (i != NIL_PGMPOOL_IDX);
2243 return NULL;
2244}
2245
2246
2247/**
2248 * Enabled write monitoring of a guest page.
2249 *
2250 * @returns VBox status code.
2251 * @retval VINF_SUCCESS on success.
2252 * @param pPool The pool.
2253 * @param pPage The cached page.
2254 */
2255static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2256{
2257 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
2258
2259 /*
2260 * Filter out the relevant kinds.
2261 */
2262 switch (pPage->enmKind)
2263 {
2264 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2265 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2266 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2267 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2268 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2269 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2270 case PGMPOOLKIND_64BIT_PML4:
2271 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2272 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2273 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2274 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2275 case PGMPOOLKIND_32BIT_PD:
2276 case PGMPOOLKIND_PAE_PDPT:
2277 break;
2278
2279 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2280 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2281 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2282 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2283 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2284 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2285 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2286 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2287 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2288 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2289 case PGMPOOLKIND_ROOT_NESTED:
2290 /* Nothing to monitor here. */
2291 return VINF_SUCCESS;
2292
2293 case PGMPOOLKIND_32BIT_PD_PHYS:
2294 case PGMPOOLKIND_PAE_PDPT_PHYS:
2295 case PGMPOOLKIND_PAE_PD_PHYS:
2296 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2297 /* Nothing to monitor here. */
2298 return VINF_SUCCESS;
2299 default:
2300 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2301 }
2302
2303 /*
2304 * Install handler.
2305 */
2306 int rc;
2307 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2308 if (pPageHead)
2309 {
2310 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2311 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2312
2313#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2314 if (pPageHead->fDirty)
2315 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirty, false /* do not remove */);
2316#endif
2317
2318 pPage->iMonitoredPrev = pPageHead->idx;
2319 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2320 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2321 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2322 pPageHead->iMonitoredNext = pPage->idx;
2323 rc = VINF_SUCCESS;
2324 }
2325 else
2326 {
2327 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2328 PVM pVM = pPool->CTX_SUFF(pVM);
2329 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
2330 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
2331 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
2332 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
2333 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
2334 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
2335 pPool->pszAccessHandler);
2336 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2337 * the heap size should suffice. */
2338 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2339 PVMCPU pVCpu = VMMGetCpu(pVM);
2340 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2341 }
2342 pPage->fMonitored = true;
2343 return rc;
2344}
2345
2346
2347/**
2348 * Disables write monitoring of a guest page.
2349 *
2350 * @returns VBox status code.
2351 * @retval VINF_SUCCESS on success.
2352 * @param pPool The pool.
2353 * @param pPage The cached page.
2354 */
2355static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2356{
2357 /*
2358 * Filter out the relevant kinds.
2359 */
2360 switch (pPage->enmKind)
2361 {
2362 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2363 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2364 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2365 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2366 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2367 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2368 case PGMPOOLKIND_64BIT_PML4:
2369 case PGMPOOLKIND_32BIT_PD:
2370 case PGMPOOLKIND_PAE_PDPT:
2371 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2372 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2373 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2374 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2375 break;
2376
2377 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2378 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2379 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2380 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2381 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2382 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2383 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2384 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2385 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2386 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2387 case PGMPOOLKIND_ROOT_NESTED:
2388 case PGMPOOLKIND_PAE_PD_PHYS:
2389 case PGMPOOLKIND_PAE_PDPT_PHYS:
2390 case PGMPOOLKIND_32BIT_PD_PHYS:
2391 /* Nothing to monitor here. */
2392 Assert(!pPage->fMonitored);
2393 return VINF_SUCCESS;
2394
2395 default:
2396 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2397 }
2398 Assert(pPage->fMonitored);
2399
2400 /*
2401 * Remove the page from the monitored list or uninstall it if last.
2402 */
2403 const PVM pVM = pPool->CTX_SUFF(pVM);
2404 int rc;
2405 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2406 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2407 {
2408 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2409 {
2410 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2411 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2412 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
2413 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
2414 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
2415 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
2416 pPool->pszAccessHandler);
2417 AssertFatalRCSuccess(rc);
2418 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2419 }
2420 else
2421 {
2422 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2423 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2424 {
2425 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2426 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2427 }
2428 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2429 rc = VINF_SUCCESS;
2430 }
2431 }
2432 else
2433 {
2434 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
2435 AssertFatalRC(rc);
2436 PVMCPU pVCpu = VMMGetCpu(pVM);
2437 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2438 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2439 }
2440 pPage->fMonitored = false;
2441
2442 /*
2443 * Remove it from the list of modified pages (if in it).
2444 */
2445 pgmPoolMonitorModifiedRemove(pPool, pPage);
2446
2447 return rc;
2448}
2449
2450
2451/**
2452 * Inserts the page into the list of modified pages.
2453 *
2454 * @param pPool The pool.
2455 * @param pPage The page.
2456 */
2457void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2458{
2459 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2460 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2461 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2462 && pPool->iModifiedHead != pPage->idx,
2463 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2464 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2465 pPool->iModifiedHead, pPool->cModifiedPages));
2466
2467 pPage->iModifiedNext = pPool->iModifiedHead;
2468 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2469 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2470 pPool->iModifiedHead = pPage->idx;
2471 pPool->cModifiedPages++;
2472#ifdef VBOX_WITH_STATISTICS
2473 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2474 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2475#endif
2476}
2477
2478
2479/**
2480 * Removes the page from the list of modified pages and resets the
2481 * moficiation counter.
2482 *
2483 * @param pPool The pool.
2484 * @param pPage The page which is believed to be in the list of modified pages.
2485 */
2486static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2487{
2488 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2489 if (pPool->iModifiedHead == pPage->idx)
2490 {
2491 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2492 pPool->iModifiedHead = pPage->iModifiedNext;
2493 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2494 {
2495 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2496 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2497 }
2498 pPool->cModifiedPages--;
2499 }
2500 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2501 {
2502 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2503 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2504 {
2505 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2506 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2507 }
2508 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2509 pPool->cModifiedPages--;
2510 }
2511 else
2512 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2513 pPage->cModifications = 0;
2514}
2515
2516
2517/**
2518 * Zaps the list of modified pages, resetting their modification counters in the process.
2519 *
2520 * @param pVM The VM handle.
2521 */
2522static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2523{
2524 pgmLock(pVM);
2525 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2526 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2527
2528 unsigned cPages = 0; NOREF(cPages);
2529
2530#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2531 pgmPoolResetDirtyPages(pVM);
2532#endif
2533
2534 uint16_t idx = pPool->iModifiedHead;
2535 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2536 while (idx != NIL_PGMPOOL_IDX)
2537 {
2538 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2539 idx = pPage->iModifiedNext;
2540 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2541 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2542 pPage->cModifications = 0;
2543 Assert(++cPages);
2544 }
2545 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2546 pPool->cModifiedPages = 0;
2547 pgmUnlock(pVM);
2548}
2549
2550
2551/**
2552 * Handle SyncCR3 pool tasks
2553 *
2554 * @returns VBox status code.
2555 * @retval VINF_SUCCESS if successfully added.
2556 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2557 * @param pVCpu The VMCPU handle.
2558 * @remark Should only be used when monitoring is available, thus placed in
2559 * the PGMPOOL_WITH_MONITORING #ifdef.
2560 */
2561int pgmPoolSyncCR3(PVMCPU pVCpu)
2562{
2563 PVM pVM = pVCpu->CTX_SUFF(pVM);
2564 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2565
2566 /*
2567 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2568 * Occasionally we will have to clear all the shadow page tables because we wanted
2569 * to monitor a page which was mapped by too many shadowed page tables. This operation
2570 * sometimes refered to as a 'lightweight flush'.
2571 */
2572# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2573 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2574 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2575# else /* !IN_RING3 */
2576 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2577 {
2578 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2579 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2580
2581 /* Make sure all other VCPUs return to ring 3. */
2582 if (pVM->cCpus > 1)
2583 {
2584 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2585 PGM_INVL_ALL_VCPU_TLBS(pVM);
2586 }
2587 return VINF_PGM_SYNC_CR3;
2588 }
2589# endif /* !IN_RING3 */
2590 else
2591 {
2592 pgmPoolMonitorModifiedClearAll(pVM);
2593
2594 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2595 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2596 {
2597 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2598 return pgmPoolSyncCR3(pVCpu);
2599 }
2600 }
2601 return VINF_SUCCESS;
2602}
2603
2604
2605/**
2606 * Frees up at least one user entry.
2607 *
2608 * @returns VBox status code.
2609 * @retval VINF_SUCCESS if successfully added.
2610 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2611 * @param pPool The pool.
2612 * @param iUser The user index.
2613 */
2614static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2615{
2616 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2617 /*
2618 * Just free cached pages in a braindead fashion.
2619 */
2620 /** @todo walk the age list backwards and free the first with usage. */
2621 int rc = VINF_SUCCESS;
2622 do
2623 {
2624 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2625 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2626 rc = rc2;
2627 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2628 return rc;
2629}
2630
2631
2632/**
2633 * Inserts a page into the cache.
2634 *
2635 * This will create user node for the page, insert it into the GCPhys
2636 * hash, and insert it into the age list.
2637 *
2638 * @returns VBox status code.
2639 * @retval VINF_SUCCESS if successfully added.
2640 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2641 * @param pPool The pool.
2642 * @param pPage The cached page.
2643 * @param GCPhys The GC physical address of the page we're gonna shadow.
2644 * @param iUser The user index.
2645 * @param iUserTable The user table index.
2646 */
2647DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2648{
2649 int rc = VINF_SUCCESS;
2650 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2651
2652 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser %x iUserTable %x\n", GCPhys, iUser, iUserTable));
2653
2654#ifdef VBOX_STRICT
2655 /*
2656 * Check that the entry doesn't already exists.
2657 */
2658 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2659 {
2660 uint16_t i = pPage->iUserHead;
2661 do
2662 {
2663 Assert(i < pPool->cMaxUsers);
2664 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2665 i = paUsers[i].iNext;
2666 } while (i != NIL_PGMPOOL_USER_INDEX);
2667 }
2668#endif
2669
2670 /*
2671 * Find free a user node.
2672 */
2673 uint16_t i = pPool->iUserFreeHead;
2674 if (i == NIL_PGMPOOL_USER_INDEX)
2675 {
2676 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2677 if (RT_FAILURE(rc))
2678 return rc;
2679 i = pPool->iUserFreeHead;
2680 }
2681
2682 /*
2683 * Unlink the user node from the free list,
2684 * initialize and insert it into the user list.
2685 */
2686 pPool->iUserFreeHead = paUsers[i].iNext;
2687 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2688 paUsers[i].iUser = iUser;
2689 paUsers[i].iUserTable = iUserTable;
2690 pPage->iUserHead = i;
2691
2692 /*
2693 * Insert into cache and enable monitoring of the guest page if enabled.
2694 *
2695 * Until we implement caching of all levels, including the CR3 one, we'll
2696 * have to make sure we don't try monitor & cache any recursive reuse of
2697 * a monitored CR3 page. Because all windows versions are doing this we'll
2698 * have to be able to do combined access monitoring, CR3 + PT and
2699 * PD + PT (guest PAE).
2700 *
2701 * Update:
2702 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2703 */
2704 const bool fCanBeMonitored = true;
2705 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2706 if (fCanBeMonitored)
2707 {
2708 rc = pgmPoolMonitorInsert(pPool, pPage);
2709 AssertRC(rc);
2710 }
2711 return rc;
2712}
2713
2714
2715/**
2716 * Adds a user reference to a page.
2717 *
2718 * This will move the page to the head of the
2719 *
2720 * @returns VBox status code.
2721 * @retval VINF_SUCCESS if successfully added.
2722 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2723 * @param pPool The pool.
2724 * @param pPage The cached page.
2725 * @param iUser The user index.
2726 * @param iUserTable The user table.
2727 */
2728static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2729{
2730 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2731
2732 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2733
2734# ifdef VBOX_STRICT
2735 /*
2736 * Check that the entry doesn't already exists. We only allow multiple users of top-level paging structures (SHW_POOL_ROOT_IDX).
2737 */
2738 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2739 {
2740 uint16_t i = pPage->iUserHead;
2741 do
2742 {
2743 Assert(i < pPool->cMaxUsers);
2744 AssertMsg(iUser != PGMPOOL_IDX_PD || iUser != PGMPOOL_IDX_PDPT || iUser != PGMPOOL_IDX_NESTED_ROOT || iUser != PGMPOOL_IDX_AMD64_CR3 ||
2745 paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2746 i = paUsers[i].iNext;
2747 } while (i != NIL_PGMPOOL_USER_INDEX);
2748 }
2749# endif
2750
2751 /*
2752 * Allocate a user node.
2753 */
2754 uint16_t i = pPool->iUserFreeHead;
2755 if (i == NIL_PGMPOOL_USER_INDEX)
2756 {
2757 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2758 if (RT_FAILURE(rc))
2759 return rc;
2760 i = pPool->iUserFreeHead;
2761 }
2762 pPool->iUserFreeHead = paUsers[i].iNext;
2763
2764 /*
2765 * Initialize the user node and insert it.
2766 */
2767 paUsers[i].iNext = pPage->iUserHead;
2768 paUsers[i].iUser = iUser;
2769 paUsers[i].iUserTable = iUserTable;
2770 pPage->iUserHead = i;
2771
2772# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2773 if (pPage->fDirty)
2774 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirty, false /* do not remove */);
2775# endif
2776
2777 /*
2778 * Tell the cache to update its replacement stats for this page.
2779 */
2780 pgmPoolCacheUsed(pPool, pPage);
2781 return VINF_SUCCESS;
2782}
2783
2784
2785/**
2786 * Frees a user record associated with a page.
2787 *
2788 * This does not clear the entry in the user table, it simply replaces the
2789 * user record to the chain of free records.
2790 *
2791 * @param pPool The pool.
2792 * @param HCPhys The HC physical address of the shadow page.
2793 * @param iUser The shadow page pool index of the user table.
2794 * @param iUserTable The index into the user table (shadowed).
2795 */
2796static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2797{
2798 /*
2799 * Unlink and free the specified user entry.
2800 */
2801 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2802
2803 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2804 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2805 uint16_t i = pPage->iUserHead;
2806 if ( i != NIL_PGMPOOL_USER_INDEX
2807 && paUsers[i].iUser == iUser
2808 && paUsers[i].iUserTable == iUserTable)
2809 {
2810 pPage->iUserHead = paUsers[i].iNext;
2811
2812 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2813 paUsers[i].iNext = pPool->iUserFreeHead;
2814 pPool->iUserFreeHead = i;
2815 return;
2816 }
2817
2818 /* General: Linear search. */
2819 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2820 while (i != NIL_PGMPOOL_USER_INDEX)
2821 {
2822 if ( paUsers[i].iUser == iUser
2823 && paUsers[i].iUserTable == iUserTable)
2824 {
2825 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2826 paUsers[iPrev].iNext = paUsers[i].iNext;
2827 else
2828 pPage->iUserHead = paUsers[i].iNext;
2829
2830 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2831 paUsers[i].iNext = pPool->iUserFreeHead;
2832 pPool->iUserFreeHead = i;
2833 return;
2834 }
2835 iPrev = i;
2836 i = paUsers[i].iNext;
2837 }
2838
2839 /* Fatal: didn't find it */
2840 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2841 iUser, iUserTable, pPage->GCPhys));
2842}
2843
2844
2845/**
2846 * Gets the entry size of a shadow table.
2847 *
2848 * @param enmKind The kind of page.
2849 *
2850 * @returns The size of the entry in bytes. That is, 4 or 8.
2851 * @returns If the kind is not for a table, an assertion is raised and 0 is
2852 * returned.
2853 */
2854DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2855{
2856 switch (enmKind)
2857 {
2858 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2859 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2860 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2861 case PGMPOOLKIND_32BIT_PD:
2862 case PGMPOOLKIND_32BIT_PD_PHYS:
2863 return 4;
2864
2865 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2866 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2867 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2868 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2869 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2870 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2871 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2872 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2873 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2874 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2875 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2876 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2877 case PGMPOOLKIND_64BIT_PML4:
2878 case PGMPOOLKIND_PAE_PDPT:
2879 case PGMPOOLKIND_ROOT_NESTED:
2880 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2881 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2882 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2883 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2884 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2885 case PGMPOOLKIND_PAE_PD_PHYS:
2886 case PGMPOOLKIND_PAE_PDPT_PHYS:
2887 return 8;
2888
2889 default:
2890 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2891 }
2892}
2893
2894
2895/**
2896 * Gets the entry size of a guest table.
2897 *
2898 * @param enmKind The kind of page.
2899 *
2900 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2901 * @returns If the kind is not for a table, an assertion is raised and 0 is
2902 * returned.
2903 */
2904DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2905{
2906 switch (enmKind)
2907 {
2908 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2909 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2910 case PGMPOOLKIND_32BIT_PD:
2911 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2912 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2913 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2914 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2915 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2916 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2917 return 4;
2918
2919 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2920 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2921 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2922 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2923 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2924 case PGMPOOLKIND_64BIT_PML4:
2925 case PGMPOOLKIND_PAE_PDPT:
2926 return 8;
2927
2928 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2929 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2930 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2931 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2932 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2933 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2934 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2935 case PGMPOOLKIND_ROOT_NESTED:
2936 case PGMPOOLKIND_PAE_PD_PHYS:
2937 case PGMPOOLKIND_PAE_PDPT_PHYS:
2938 case PGMPOOLKIND_32BIT_PD_PHYS:
2939 /** @todo can we return 0? (nobody is calling this...) */
2940 AssertFailed();
2941 return 0;
2942
2943 default:
2944 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2945 }
2946}
2947
2948
2949/**
2950 * Scans one shadow page table for mappings of a physical page.
2951 *
2952 * @returns true/false indicating removal of all relevant PTEs
2953 * @param pVM The VM handle.
2954 * @param pPhysPage The guest page in question.
2955 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
2956 * @param iShw The shadow page table.
2957 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
2958 * @param cRefs The number of references made in that PT.
2959 */
2960static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte, uint16_t cRefs)
2961{
2962 LogFlow(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d iPte=%d cRefs=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte, cRefs));
2963 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2964 bool fRet = false;
2965
2966 /*
2967 * Assert sanity.
2968 */
2969 Assert(cRefs == 1);
2970 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
2971 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2972 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2973
2974 /*
2975 * Then, clear the actual mappings to the page in the shadow PT.
2976 */
2977 switch (pPage->enmKind)
2978 {
2979 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2980 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2981 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2982 {
2983 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2984 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2985 uint32_t u32AndMask = 0;
2986 uint32_t u32OrMask = 0;
2987
2988 if (!fFlushPTEs)
2989 {
2990 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
2991 {
2992 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
2993 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
2994 u32OrMask = X86_PTE_RW;
2995 u32AndMask = UINT32_MAX;
2996 fRet = true;
2997 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
2998 break;
2999
3000 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3001 u32OrMask = 0;
3002 u32AndMask = ~X86_PTE_RW;
3003 fRet = true;
3004 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3005 break;
3006 default:
3007 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3008 break;
3009 }
3010 }
3011 else
3012 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3013
3014 /* Update the counter if we're removing references. */
3015 if (!u32AndMask)
3016 {
3017 Assert(pPage->cPresent >= cRefs);
3018 Assert(pPool->cPresent >= cRefs);
3019 pPage->cPresent -= cRefs;
3020 pPool->cPresent -= cRefs;
3021 }
3022
3023 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3024 {
3025 X86PTE Pte;
3026
3027 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", iPte, pPT->a[iPte], cRefs));
3028 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3029 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3030 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3031
3032 ASMAtomicWriteSize(&pPT->a[iPte].u, Pte.u);
3033 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3034 return fRet;
3035 }
3036#ifdef LOG_ENABLED
3037 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3038 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3039 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3040 {
3041 Log(("i=%d cRefs=%d\n", i, cRefs--));
3042 }
3043#endif
3044 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3045 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3046 break;
3047 }
3048
3049 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3050 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3051 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3052 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3053 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3054 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3055 {
3056 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3057 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3058 uint64_t u64OrMask = 0;
3059 uint64_t u64AndMask = 0;
3060
3061 if (!fFlushPTEs)
3062 {
3063 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3064 {
3065 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
3066 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
3067 u64OrMask = X86_PTE_RW;
3068 u64AndMask = UINT64_MAX;
3069 fRet = true;
3070 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3071 break;
3072
3073 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3074 u64OrMask = 0;
3075 u64AndMask = ~((uint64_t)X86_PTE_RW);
3076 fRet = true;
3077 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3078 break;
3079
3080 default:
3081 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3082 break;
3083 }
3084 }
3085 else
3086 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3087
3088 /* Update the counter if we're removing references. */
3089 if (!u64AndMask)
3090 {
3091 Assert(pPage->cPresent >= cRefs);
3092 Assert(pPool->cPresent >= cRefs);
3093 pPage->cPresent -= cRefs;
3094 pPool->cPresent -= cRefs;
3095 }
3096
3097 if ((pPT->a[iPte].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3098 {
3099 X86PTEPAE Pte;
3100
3101 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", iPte, pPT->a[iPte], cRefs));
3102 Pte.u = (pPT->a[iPte].u & u64AndMask) | u64OrMask;
3103 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3104 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3105
3106 ASMAtomicWriteSize(&pPT->a[iPte].u, Pte.u);
3107 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3108 return fRet;
3109 }
3110#ifdef LOG_ENABLED
3111 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3112 Log(("Found %RX64 expected %RX64\n", pPT->a[iPte].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P), u64));
3113 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3114 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3115 {
3116 Log(("i=%d cRefs=%d\n", i, cRefs--));
3117 }
3118#endif
3119 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind));
3120 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3121 break;
3122 }
3123
3124#ifdef PGM_WITH_LARGE_PAGES
3125 /* Large page case only. */
3126 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3127 {
3128 Assert(pVM->pgm.s.fNestedPaging);
3129
3130 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3131 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3132
3133 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3134 {
3135 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64 cRefs=%#x\n", iPte, pPD->a[iPte], cRefs));
3136 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3137 pPD->a[iPte].u = 0;
3138 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3139
3140 /* Update the counter as we're removing references. */
3141 Assert(pPage->cPresent);
3142 Assert(pPool->cPresent);
3143 pPage->cPresent--;
3144 pPool->cPresent--;
3145
3146 return fRet;
3147 }
3148# ifdef LOG_ENABLED
3149 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3150 for (unsigned i = 0; i < RT_ELEMENTS(pPD->a); i++)
3151 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3152 {
3153 Log(("i=%d cRefs=%d\n", i, cRefs--));
3154 }
3155# endif
3156 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3157 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3158 break;
3159 }
3160
3161 /* AMD-V nested paging - @todo merge with EPT as we only check the parts that are identical. */
3162 case PGMPOOLKIND_PAE_PD_PHYS:
3163 {
3164 Assert(pVM->pgm.s.fNestedPaging);
3165
3166 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3167 PX86PD pPD = (PX86PD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3168
3169 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3170 {
3171 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64 cRefs=%#x\n", iPte, pPD->a[iPte], cRefs));
3172 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3173 pPD->a[iPte].u = 0;
3174 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3175
3176 /* Update the counter as we're removing references. */
3177 Assert(pPage->cPresent);
3178 Assert(pPool->cPresent);
3179 pPage->cPresent--;
3180 pPool->cPresent--;
3181 return fRet;
3182 }
3183# ifdef LOG_ENABLED
3184 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3185 for (unsigned i = 0; i < RT_ELEMENTS(pPD->a); i++)
3186 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3187 {
3188 Log(("i=%d cRefs=%d\n", i, cRefs--));
3189 }
3190# endif
3191 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3192 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3193 break;
3194 }
3195#endif /* PGM_WITH_LARGE_PAGES */
3196
3197 default:
3198 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3199 }
3200 return fRet;
3201}
3202
3203
3204/**
3205 * Scans one shadow page table for mappings of a physical page.
3206 *
3207 * @param pVM The VM handle.
3208 * @param pPhysPage The guest page in question.
3209 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3210 * @param iShw The shadow page table.
3211 * @param cRefs The number of references made in that PT.
3212 */
3213static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t cRefs)
3214{
3215 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3216
3217 /* We should only come here with when there's only one reference to this physical page. */
3218 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3219 Assert(cRefs == 1);
3220
3221 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d cRefs=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, cRefs));
3222 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3223 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage), cRefs);
3224 if (!fKeptPTEs)
3225 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3226 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3227}
3228
3229
3230/**
3231 * Flushes a list of shadow page tables mapping the same physical page.
3232 *
3233 * @param pVM The VM handle.
3234 * @param pPhysPage The guest page in question.
3235 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3236 * @param iPhysExt The physical cross reference extent list to flush.
3237 */
3238static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3239{
3240 Assert(PGMIsLockOwner(pVM));
3241 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3242 bool fKeepList = false;
3243
3244 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3245 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3246
3247 const uint16_t iPhysExtStart = iPhysExt;
3248 PPGMPOOLPHYSEXT pPhysExt;
3249 do
3250 {
3251 Assert(iPhysExt < pPool->cMaxPhysExts);
3252 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3253 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3254 {
3255 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3256 {
3257 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i], 1);
3258 if (!fKeptPTEs)
3259 {
3260 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3261 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3262 }
3263 else
3264 fKeepList = true;
3265 }
3266 }
3267 /* next */
3268 iPhysExt = pPhysExt->iNext;
3269 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3270
3271 if (!fKeepList)
3272 {
3273 /* insert the list into the free list and clear the ram range entry. */
3274 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3275 pPool->iPhysExtFreeHead = iPhysExtStart;
3276 /* Invalidate the tracking data. */
3277 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3278 }
3279
3280 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3281}
3282
3283
3284/**
3285 * Flushes all shadow page table mappings of the given guest page.
3286 *
3287 * This is typically called when the host page backing the guest one has been
3288 * replaced or when the page protection was changed due to an access handler.
3289 *
3290 * @returns VBox status code.
3291 * @retval VINF_SUCCESS if all references has been successfully cleared.
3292 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3293 * pool cleaning. FF and sync flags are set.
3294 *
3295 * @param pVM The VM handle.
3296 * @param GCPhysPage GC physical address of the page in question
3297 * @param pPhysPage The guest page in question.
3298 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3299 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3300 * flushed, it is NOT touched if this isn't necessary.
3301 * The caller MUST initialized this to @a false.
3302 */
3303int pgmPoolTrackUpdateGCPhys(PVM pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3304{
3305 PVMCPU pVCpu = VMMGetCpu(pVM);
3306 pgmLock(pVM);
3307 int rc = VINF_SUCCESS;
3308
3309#ifdef PGM_WITH_LARGE_PAGES
3310 /* Is this page part of a large page? */
3311 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3312 {
3313 PPGMPAGE pPhysBase;
3314 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3315
3316 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3317
3318 /* Fetch the large page base. */
3319 if (GCPhysBase != GCPhysPage)
3320 {
3321 pPhysBase = pgmPhysGetPage(&pVM->pgm.s, GCPhysBase);
3322 AssertFatal(pPhysBase);
3323 }
3324 else
3325 pPhysBase = pPhysPage;
3326
3327 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3328
3329 if (PGM_PAGE_GET_PDE_TYPE(pPhysBase) == PGM_PAGE_PDE_TYPE_PDE)
3330 {
3331 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3332 PGM_PAGE_SET_PDE_TYPE(pPhysBase, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3333
3334 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3335 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pPhysBase, fFlushPTEs, pfFlushTLBs);
3336
3337 *pfFlushTLBs = true;
3338 pgmUnlock(pVM);
3339 return rc;
3340 }
3341 }
3342#else
3343 NOREF(GCPhysPage);
3344#endif /* PGM_WITH_LARGE_PAGES */
3345
3346 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3347 if (u16)
3348 {
3349 /*
3350 * The zero page is currently screwing up the tracking and we'll
3351 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3352 * is defined, zero pages won't normally be mapped. Some kind of solution
3353 * will be needed for this problem of course, but it will have to wait...
3354 */
3355 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3356 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3357 rc = VINF_PGM_GCPHYS_ALIASED;
3358 else
3359 {
3360# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC) /** @todo we can drop this now. */
3361 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3362 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3363 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
3364# endif
3365
3366 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3367 pgmPoolTrackFlushGCPhysPT(pVM,
3368 pPhysPage,
3369 fFlushPTEs,
3370 PGMPOOL_TD_GET_IDX(u16),
3371 PGMPOOL_TD_GET_CREFS(u16));
3372 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3373 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3374 else
3375 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3376 *pfFlushTLBs = true;
3377
3378# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_R0) || defined(IN_RC)
3379 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
3380# endif
3381 }
3382 }
3383
3384 if (rc == VINF_PGM_GCPHYS_ALIASED)
3385 {
3386 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3387 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3388 rc = VINF_PGM_SYNC_CR3;
3389 }
3390 pgmUnlock(pVM);
3391 return rc;
3392}
3393
3394
3395/**
3396 * Scans all shadow page tables for mappings of a physical page.
3397 *
3398 * This may be slow, but it's most likely more efficient than cleaning
3399 * out the entire page pool / cache.
3400 *
3401 * @returns VBox status code.
3402 * @retval VINF_SUCCESS if all references has been successfully cleared.
3403 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3404 * a page pool cleaning.
3405 *
3406 * @param pVM The VM handle.
3407 * @param pPhysPage The guest page in question.
3408 */
3409int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3410{
3411 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3412 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3413 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3414 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3415
3416 /*
3417 * There is a limit to what makes sense.
3418 */
3419 if ( pPool->cPresent > 1024
3420 && pVM->cCpus == 1)
3421 {
3422 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3423 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3424 return VINF_PGM_GCPHYS_ALIASED;
3425 }
3426
3427 /*
3428 * Iterate all the pages until we've encountered all that in use.
3429 * This is simple but not quite optimal solution.
3430 */
3431 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3432 const uint32_t u32 = u64;
3433 unsigned cLeft = pPool->cUsedPages;
3434 unsigned iPage = pPool->cCurPages;
3435 while (--iPage >= PGMPOOL_IDX_FIRST)
3436 {
3437 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3438 if ( pPage->GCPhys != NIL_RTGCPHYS
3439 && pPage->cPresent)
3440 {
3441 switch (pPage->enmKind)
3442 {
3443 /*
3444 * We only care about shadow page tables.
3445 */
3446 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3447 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3448 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3449 {
3450 unsigned cPresent = pPage->cPresent;
3451 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3452 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3453 if (pPT->a[i].n.u1Present)
3454 {
3455 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3456 {
3457 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3458 pPT->a[i].u = 0;
3459
3460 /* Update the counter as we're removing references. */
3461 Assert(pPage->cPresent);
3462 Assert(pPool->cPresent);
3463 pPage->cPresent--;
3464 pPool->cPresent--;
3465 }
3466 if (!--cPresent)
3467 break;
3468 }
3469 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3470 break;
3471 }
3472
3473 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3474 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3475 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3476 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3477 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3478 {
3479 unsigned cPresent = pPage->cPresent;
3480 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3481 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3482 if (PGM_POOL_IS_PAE_PTE_PRESENT(pPT->a[i]))
3483 {
3484 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3485 {
3486 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3487 pPT->a[i].u = 0;
3488
3489 /* Update the counter as we're removing references. */
3490 Assert(pPage->cPresent);
3491 Assert(pPool->cPresent);
3492 pPage->cPresent--;
3493 pPool->cPresent--;
3494 }
3495 if (!--cPresent)
3496 break;
3497 }
3498 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3499 break;
3500 }
3501#ifndef IN_RC
3502 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3503 {
3504 unsigned cPresent = pPage->cPresent;
3505 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3506 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3507 if (PGM_POOL_IS_EPT_PTE_PRESENT(pPT->a[i]))
3508 {
3509 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3510 {
3511 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3512 pPT->a[i].u = 0;
3513
3514 /* Update the counter as we're removing references. */
3515 Assert(pPage->cPresent);
3516 Assert(pPool->cPresent);
3517 pPage->cPresent--;
3518 pPool->cPresent--;
3519 }
3520 if (!--cPresent)
3521 break;
3522 }
3523 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3524 break;
3525 }
3526#endif
3527 }
3528 if (!--cLeft)
3529 break;
3530 }
3531 }
3532
3533 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3534 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3535
3536 /*
3537 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3538 */
3539 if (pPool->cPresent > 1024)
3540 {
3541 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3542 return VINF_PGM_GCPHYS_ALIASED;
3543 }
3544
3545 return VINF_SUCCESS;
3546}
3547
3548
3549/**
3550 * Clears the user entry in a user table.
3551 *
3552 * This is used to remove all references to a page when flushing it.
3553 */
3554static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3555{
3556 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3557 Assert(pUser->iUser < pPool->cCurPages);
3558 uint32_t iUserTable = pUser->iUserTable;
3559
3560 /*
3561 * Map the user page.
3562 */
3563 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3564 union
3565 {
3566 uint64_t *pau64;
3567 uint32_t *pau32;
3568 } u;
3569 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3570
3571 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3572
3573 /* Safety precaution in case we change the paging for other modes too in the future. */
3574 Assert(!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage));
3575
3576#ifdef VBOX_STRICT
3577 /*
3578 * Some sanity checks.
3579 */
3580 switch (pUserPage->enmKind)
3581 {
3582 case PGMPOOLKIND_32BIT_PD:
3583 case PGMPOOLKIND_32BIT_PD_PHYS:
3584 Assert(iUserTable < X86_PG_ENTRIES);
3585 break;
3586 case PGMPOOLKIND_PAE_PDPT:
3587 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3588 case PGMPOOLKIND_PAE_PDPT_PHYS:
3589 Assert(iUserTable < 4);
3590 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3591 break;
3592 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3593 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3594 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3595 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3596 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3597 case PGMPOOLKIND_PAE_PD_PHYS:
3598 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3599 break;
3600 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3601 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3602 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3603 break;
3604 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3605 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3606 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3607 break;
3608 case PGMPOOLKIND_64BIT_PML4:
3609 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3610 /* GCPhys >> PAGE_SHIFT is the index here */
3611 break;
3612 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3613 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3614 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3615 break;
3616
3617 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3618 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3619 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3620 break;
3621
3622 case PGMPOOLKIND_ROOT_NESTED:
3623 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3624 break;
3625
3626 default:
3627 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3628 break;
3629 }
3630#endif /* VBOX_STRICT */
3631
3632 /*
3633 * Clear the entry in the user page.
3634 */
3635 switch (pUserPage->enmKind)
3636 {
3637 /* 32-bit entries */
3638 case PGMPOOLKIND_32BIT_PD:
3639 case PGMPOOLKIND_32BIT_PD_PHYS:
3640 ASMAtomicWriteSize(&u.pau32[iUserTable], 0);
3641 break;
3642
3643 /* 64-bit entries */
3644 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3645 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3646 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3647 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3648 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3649#if defined(IN_RC)
3650 /*
3651 * In 32 bits PAE mode we *must* invalidate the TLB when changing a
3652 * PDPT entry; the CPU fetches them only during cr3 load, so any
3653 * non-present PDPT will continue to cause page faults.
3654 */
3655 ASMReloadCR3();
3656 /* no break */
3657#endif
3658 case PGMPOOLKIND_PAE_PD_PHYS:
3659 case PGMPOOLKIND_PAE_PDPT_PHYS:
3660 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3661 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3662 case PGMPOOLKIND_64BIT_PML4:
3663 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3664 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3665 case PGMPOOLKIND_PAE_PDPT:
3666 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3667 case PGMPOOLKIND_ROOT_NESTED:
3668 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3669 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3670 ASMAtomicWriteSize(&u.pau64[iUserTable], 0);
3671 break;
3672
3673 default:
3674 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3675 }
3676 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
3677}
3678
3679
3680/**
3681 * Clears all users of a page.
3682 */
3683static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3684{
3685 /*
3686 * Free all the user records.
3687 */
3688 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3689
3690 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3691 uint16_t i = pPage->iUserHead;
3692 while (i != NIL_PGMPOOL_USER_INDEX)
3693 {
3694 /* Clear enter in user table. */
3695 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3696
3697 /* Free it. */
3698 const uint16_t iNext = paUsers[i].iNext;
3699 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3700 paUsers[i].iNext = pPool->iUserFreeHead;
3701 pPool->iUserFreeHead = i;
3702
3703 /* Next. */
3704 i = iNext;
3705 }
3706 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3707}
3708
3709
3710/**
3711 * Allocates a new physical cross reference extent.
3712 *
3713 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3714 * @param pVM The VM handle.
3715 * @param piPhysExt Where to store the phys ext index.
3716 */
3717PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3718{
3719 Assert(PGMIsLockOwner(pVM));
3720 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3721 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3722 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3723 {
3724 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3725 return NULL;
3726 }
3727 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3728 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3729 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3730 *piPhysExt = iPhysExt;
3731 return pPhysExt;
3732}
3733
3734
3735/**
3736 * Frees a physical cross reference extent.
3737 *
3738 * @param pVM The VM handle.
3739 * @param iPhysExt The extent to free.
3740 */
3741void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3742{
3743 Assert(PGMIsLockOwner(pVM));
3744 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3745 Assert(iPhysExt < pPool->cMaxPhysExts);
3746 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3747 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3748 {
3749 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3750 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3751 }
3752 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3753 pPool->iPhysExtFreeHead = iPhysExt;
3754}
3755
3756
3757/**
3758 * Frees a physical cross reference extent.
3759 *
3760 * @param pVM The VM handle.
3761 * @param iPhysExt The extent to free.
3762 */
3763void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3764{
3765 Assert(PGMIsLockOwner(pVM));
3766 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3767
3768 const uint16_t iPhysExtStart = iPhysExt;
3769 PPGMPOOLPHYSEXT pPhysExt;
3770 do
3771 {
3772 Assert(iPhysExt < pPool->cMaxPhysExts);
3773 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3774 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3775 {
3776 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3777 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3778 }
3779
3780 /* next */
3781 iPhysExt = pPhysExt->iNext;
3782 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3783
3784 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3785 pPool->iPhysExtFreeHead = iPhysExtStart;
3786}
3787
3788
3789/**
3790 * Insert a reference into a list of physical cross reference extents.
3791 *
3792 * @returns The new tracking data for PGMPAGE.
3793 *
3794 * @param pVM The VM handle.
3795 * @param iPhysExt The physical extent index of the list head.
3796 * @param iShwPT The shadow page table index.
3797 * @param iPte Page table entry
3798 *
3799 */
3800static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
3801{
3802 Assert(PGMIsLockOwner(pVM));
3803 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3804 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3805
3806 /* special common case. */
3807 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3808 {
3809 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3810 paPhysExts[iPhysExt].apte[2] = iPte;
3811 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
3812 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
3813 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3814 }
3815
3816 /* general treatment. */
3817 const uint16_t iPhysExtStart = iPhysExt;
3818 unsigned cMax = 15;
3819 for (;;)
3820 {
3821 Assert(iPhysExt < pPool->cMaxPhysExts);
3822 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3823 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3824 {
3825 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3826 paPhysExts[iPhysExt].apte[i] = iPte;
3827 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
3828 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
3829 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3830 }
3831 if (!--cMax)
3832 {
3833 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackOverflows);
3834 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3835 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
3836 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3837 }
3838 }
3839
3840 /* add another extent to the list. */
3841 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3842 if (!pNew)
3843 {
3844 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackNoExtentsLeft);
3845 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3846 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
3847 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3848 }
3849 pNew->iNext = iPhysExtStart;
3850 pNew->aidx[0] = iShwPT;
3851 pNew->apte[0] = iPte;
3852 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
3853 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3854}
3855
3856
3857/**
3858 * Add a reference to guest physical page where extents are in use.
3859 *
3860 * @returns The new tracking data for PGMPAGE.
3861 *
3862 * @param pVM The VM handle.
3863 * @param pPhysPage Pointer to the aPages entry in the ram range.
3864 * @param u16 The ram range flags (top 16-bits).
3865 * @param iShwPT The shadow page table index.
3866 * @param iPte Page table entry
3867 */
3868uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
3869{
3870 pgmLock(pVM);
3871 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3872 {
3873 /*
3874 * Convert to extent list.
3875 */
3876 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3877 uint16_t iPhysExt;
3878 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3879 if (pPhysExt)
3880 {
3881 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
3882 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliased);
3883 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
3884 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
3885 pPhysExt->aidx[1] = iShwPT;
3886 pPhysExt->apte[1] = iPte;
3887 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3888 }
3889 else
3890 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3891 }
3892 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3893 {
3894 /*
3895 * Insert into the extent list.
3896 */
3897 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
3898 }
3899 else
3900 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedLots);
3901 pgmUnlock(pVM);
3902 return u16;
3903}
3904
3905/**
3906 * Clear references to guest physical memory.
3907 *
3908 * @param pPool The pool.
3909 * @param pPage The page.
3910 * @param pPhysPage Pointer to the aPages entry in the ram range.
3911 * @param iPte Shadow PTE index
3912 */
3913void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
3914{
3915 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
3916 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3917
3918 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
3919 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
3920 {
3921 PVM pVM = pPool->CTX_SUFF(pVM);
3922 pgmLock(pVM);
3923
3924 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3925 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3926 do
3927 {
3928 Assert(iPhysExt < pPool->cMaxPhysExts);
3929
3930 /*
3931 * Look for the shadow page and check if it's all freed.
3932 */
3933 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3934 {
3935 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
3936 && paPhysExts[iPhysExt].apte[i] == iPte)
3937 {
3938 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3939 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3940
3941 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3942 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3943 {
3944 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3945 pgmUnlock(pVM);
3946 return;
3947 }
3948
3949 /* we can free the node. */
3950 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3951 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3952 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3953 {
3954 /* lonely node */
3955 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3956 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
3957 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3958 }
3959 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3960 {
3961 /* head */
3962 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
3963 PGM_PAGE_SET_TRACKING(pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
3964 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3965 }
3966 else
3967 {
3968 /* in list */
3969 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
3970 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3971 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3972 }
3973 iPhysExt = iPhysExtNext;
3974 pgmUnlock(pVM);
3975 return;
3976 }
3977 }
3978
3979 /* next */
3980 iPhysExtPrev = iPhysExt;
3981 iPhysExt = paPhysExts[iPhysExt].iNext;
3982 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3983
3984 pgmUnlock(pVM);
3985 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3986 }
3987 else /* nothing to do */
3988 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
3989}
3990
3991/**
3992 * Clear references to guest physical memory.
3993 *
3994 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3995 * is assumed to be correct, so the linear search can be skipped and we can assert
3996 * at an earlier point.
3997 *
3998 * @param pPool The pool.
3999 * @param pPage The page.
4000 * @param HCPhys The host physical address corresponding to the guest page.
4001 * @param GCPhys The guest physical address corresponding to HCPhys.
4002 * @param iPte Shadow PTE index
4003 */
4004static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4005{
4006 /*
4007 * Walk range list.
4008 */
4009 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
4010 while (pRam)
4011 {
4012 RTGCPHYS off = GCPhys - pRam->GCPhys;
4013 if (off < pRam->cb)
4014 {
4015 /* does it match? */
4016 const unsigned iPage = off >> PAGE_SHIFT;
4017 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
4018#ifdef LOG_ENABLED
4019 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
4020 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4021#endif
4022 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4023 {
4024 Assert(pPage->cPresent);
4025 Assert(pPool->cPresent);
4026 pPage->cPresent--;
4027 pPool->cPresent--;
4028 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4029 return;
4030 }
4031 break;
4032 }
4033 pRam = pRam->CTX_SUFF(pNext);
4034 }
4035 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4036}
4037
4038
4039/**
4040 * Clear references to guest physical memory.
4041 *
4042 * @param pPool The pool.
4043 * @param pPage The page.
4044 * @param HCPhys The host physical address corresponding to the guest page.
4045 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4046 * @param iPte Shadow pte index
4047 */
4048void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4049{
4050 RTHCPHYS HCPhysExpected = 0xDEADBEEFDEADBEEFULL;
4051
4052 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4053
4054 /*
4055 * Walk range list.
4056 */
4057 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
4058 while (pRam)
4059 {
4060 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
4061 if (off < pRam->cb)
4062 {
4063 /* does it match? */
4064 const unsigned iPage = off >> PAGE_SHIFT;
4065 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
4066 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4067 {
4068 Assert(pPage->cPresent);
4069 Assert(pPool->cPresent);
4070 pPage->cPresent--;
4071 pPool->cPresent--;
4072 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4073 return;
4074 }
4075 HCPhysExpected = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
4076 break;
4077 }
4078 pRam = pRam->CTX_SUFF(pNext);
4079 }
4080
4081 /*
4082 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4083 */
4084 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4085 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
4086 while (pRam)
4087 {
4088 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4089 while (iPage-- > 0)
4090 {
4091 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4092 {
4093 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4094 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4095 Assert(pPage->cPresent);
4096 Assert(pPool->cPresent);
4097 pPage->cPresent--;
4098 pPool->cPresent--;
4099 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4100 return;
4101 }
4102 }
4103 pRam = pRam->CTX_SUFF(pNext);
4104 }
4105
4106 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Expected HCPhys with hint = %RHp)\n", HCPhys, GCPhysHint, HCPhysExpected));
4107}
4108
4109
4110/**
4111 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4112 *
4113 * @param pPool The pool.
4114 * @param pPage The page.
4115 * @param pShwPT The shadow page table (mapping of the page).
4116 * @param pGstPT The guest page table.
4117 */
4118DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4119{
4120 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4121 if (pShwPT->a[i].n.u1Present)
4122 {
4123 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4124 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4125 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK, i);
4126 if (!pPage->cPresent)
4127 break;
4128 }
4129}
4130
4131
4132/**
4133 * Clear references to guest physical memory in a PAE / 32-bit page table.
4134 *
4135 * @param pPool The pool.
4136 * @param pPage The page.
4137 * @param pShwPT The shadow page table (mapping of the page).
4138 * @param pGstPT The guest page table (just a half one).
4139 */
4140DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
4141{
4142 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4143 if (PGM_POOL_IS_PAE_PTE_PRESENT(pShwPT->a[i]))
4144 {
4145 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4146 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4147 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK, i);
4148 if (!pPage->cPresent)
4149 break;
4150 }
4151}
4152
4153
4154/**
4155 * Clear references to guest physical memory in a PAE / PAE page table.
4156 *
4157 * @param pPool The pool.
4158 * @param pPage The page.
4159 * @param pShwPT The shadow page table (mapping of the page).
4160 * @param pGstPT The guest page table.
4161 */
4162DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
4163{
4164 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4165 if (PGM_POOL_IS_PAE_PTE_PRESENT(pShwPT->a[i]))
4166 {
4167 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4168 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4169 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
4170 if (!pPage->cPresent)
4171 break;
4172 }
4173}
4174
4175
4176/**
4177 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4178 *
4179 * @param pPool The pool.
4180 * @param pPage The page.
4181 * @param pShwPT The shadow page table (mapping of the page).
4182 */
4183DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4184{
4185 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4186 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4187 if (pShwPT->a[i].n.u1Present)
4188 {
4189 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4190 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4191 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys, i);
4192 if (!pPage->cPresent)
4193 break;
4194 }
4195}
4196
4197
4198/**
4199 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4200 *
4201 * @param pPool The pool.
4202 * @param pPage The page.
4203 * @param pShwPT The shadow page table (mapping of the page).
4204 */
4205DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
4206{
4207 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4208 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4209 if (PGM_POOL_IS_PAE_PTE_PRESENT(pShwPT->a[i]))
4210 {
4211 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4212 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
4213 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys, i);
4214 if (!pPage->cPresent)
4215 break;
4216 }
4217}
4218
4219
4220/**
4221 * Clear references to shadowed pages in an EPT page table.
4222 *
4223 * @param pPool The pool.
4224 * @param pPage The page.
4225 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4226 */
4227DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4228{
4229 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4230 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4231 if (PGM_POOL_IS_EPT_PTE_PRESENT(pShwPT->a[i]))
4232 {
4233 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4234 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4235 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys, i);
4236 if (!pPage->cPresent)
4237 break;
4238 }
4239}
4240
4241
4242
4243/**
4244 * Clear references to shadowed pages in a 32 bits page directory.
4245 *
4246 * @param pPool The pool.
4247 * @param pPage The page.
4248 * @param pShwPD The shadow page directory (mapping of the page).
4249 */
4250DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4251{
4252 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4253 {
4254 if ( pShwPD->a[i].n.u1Present
4255 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4256 )
4257 {
4258 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4259 if (pSubPage)
4260 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4261 else
4262 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4263 }
4264 }
4265}
4266
4267/**
4268 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4269 *
4270 * @param pPool The pool.
4271 * @param pPage The page.
4272 * @param pShwPD The shadow page directory (mapping of the page).
4273 */
4274DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4275{
4276 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4277 {
4278 if ( pShwPD->a[i].n.u1Present
4279 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4280 )
4281 {
4282#ifdef PGM_WITH_LARGE_PAGES
4283 if (pShwPD->a[i].b.u1Size)
4284 {
4285 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4286 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4287 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys /* == base of 2 MB page */, i);
4288 }
4289 else
4290#endif
4291 {
4292 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4293 if (pSubPage)
4294 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4295 else
4296 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4297 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4298 }
4299 }
4300 }
4301}
4302
4303/**
4304 * Clear references to shadowed pages in a PAE page directory pointer table.
4305 *
4306 * @param pPool The pool.
4307 * @param pPage The page.
4308 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4309 */
4310DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4311{
4312 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4313 {
4314 if ( pShwPDPT->a[i].n.u1Present
4315 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4316 )
4317 {
4318 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4319 if (pSubPage)
4320 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4321 else
4322 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4323 }
4324 }
4325}
4326
4327
4328/**
4329 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4330 *
4331 * @param pPool The pool.
4332 * @param pPage The page.
4333 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4334 */
4335DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4336{
4337 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4338 {
4339 Assert(!(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING));
4340 if (pShwPDPT->a[i].n.u1Present)
4341 {
4342 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4343 if (pSubPage)
4344 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4345 else
4346 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4347 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4348 }
4349 }
4350}
4351
4352
4353/**
4354 * Clear references to shadowed pages in a 64-bit level 4 page table.
4355 *
4356 * @param pPool The pool.
4357 * @param pPage The page.
4358 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4359 */
4360DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4361{
4362 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4363 {
4364 if (pShwPML4->a[i].n.u1Present)
4365 {
4366 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4367 if (pSubPage)
4368 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4369 else
4370 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4371 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4372 }
4373 }
4374}
4375
4376
4377/**
4378 * Clear references to shadowed pages in an EPT page directory.
4379 *
4380 * @param pPool The pool.
4381 * @param pPage The page.
4382 * @param pShwPD The shadow page directory (mapping of the page).
4383 */
4384DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4385{
4386 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4387 {
4388 if (pShwPD->a[i].n.u1Present)
4389 {
4390#ifdef PGM_WITH_LARGE_PAGES
4391 if (pShwPD->a[i].b.u1Size)
4392 {
4393 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4394 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4395 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys /* == base of 2 MB page */, i);
4396 }
4397 else
4398#endif
4399 {
4400 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4401 if (pSubPage)
4402 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4403 else
4404 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4405 }
4406 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4407 }
4408 }
4409}
4410
4411
4412/**
4413 * Clear references to shadowed pages in an EPT page directory pointer table.
4414 *
4415 * @param pPool The pool.
4416 * @param pPage The page.
4417 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4418 */
4419DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4420{
4421 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4422 {
4423 if (pShwPDPT->a[i].n.u1Present)
4424 {
4425 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4426 if (pSubPage)
4427 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4428 else
4429 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4430 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4431 }
4432 }
4433}
4434
4435
4436/**
4437 * Clears all references made by this page.
4438 *
4439 * This includes other shadow pages and GC physical addresses.
4440 *
4441 * @param pPool The pool.
4442 * @param pPage The page.
4443 */
4444static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4445{
4446 /*
4447 * Map the shadow page and take action according to the page kind.
4448 */
4449 PVM pVM = pPool->CTX_SUFF(pVM);
4450 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4451 switch (pPage->enmKind)
4452 {
4453 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4454 {
4455 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4456 void *pvGst;
4457 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4458 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4459 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4460 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4461 break;
4462 }
4463
4464 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4465 {
4466 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4467 void *pvGst;
4468 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4469 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
4470 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4471 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4472 break;
4473 }
4474
4475 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4476 {
4477 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4478 void *pvGst;
4479 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4480 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
4481 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4482 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4483 break;
4484 }
4485
4486 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4487 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4488 {
4489 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4490 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4491 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4492 break;
4493 }
4494
4495 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4496 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4497 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4498 {
4499 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4500 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
4501 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4502 break;
4503 }
4504
4505 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4506 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4507 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4508 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4509 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4510 case PGMPOOLKIND_PAE_PD_PHYS:
4511 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4512 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4513 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4514 break;
4515
4516 case PGMPOOLKIND_32BIT_PD_PHYS:
4517 case PGMPOOLKIND_32BIT_PD:
4518 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4519 break;
4520
4521 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4522 case PGMPOOLKIND_PAE_PDPT:
4523 case PGMPOOLKIND_PAE_PDPT_PHYS:
4524 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4525 break;
4526
4527 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4528 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4529 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4530 break;
4531
4532 case PGMPOOLKIND_64BIT_PML4:
4533 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4534 break;
4535
4536 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4537 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4538 break;
4539
4540 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4541 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4542 break;
4543
4544 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4545 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4546 break;
4547
4548 default:
4549 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4550 }
4551
4552 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4553 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4554 ASMMemZeroPage(pvShw);
4555 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4556 pPage->fZeroed = true;
4557 Assert(!pPage->cPresent);
4558 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
4559}
4560
4561/**
4562 * Flushes a pool page.
4563 *
4564 * This moves the page to the free list after removing all user references to it.
4565 *
4566 * @returns VBox status code.
4567 * @retval VINF_SUCCESS on success.
4568 * @param pPool The pool.
4569 * @param HCPhys The HC physical address of the shadow page.
4570 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4571 */
4572int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4573{
4574 PVM pVM = pPool->CTX_SUFF(pVM);
4575 bool fFlushRequired = false;
4576
4577 int rc = VINF_SUCCESS;
4578 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4579 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4580 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4581
4582 /*
4583 * Quietly reject any attempts at flushing any of the special root pages.
4584 */
4585 if (pPage->idx < PGMPOOL_IDX_FIRST)
4586 {
4587 AssertFailed(); /* can no longer happen */
4588 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4589 return VINF_SUCCESS;
4590 }
4591
4592 pgmLock(pVM);
4593
4594 /*
4595 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4596 */
4597 if (pgmPoolIsPageLocked(&pVM->pgm.s, pPage))
4598 {
4599 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4600 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4601 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4602 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4603 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4604 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4605 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4606 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4607 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD,
4608 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4609 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4610 pgmUnlock(pVM);
4611 return VINF_SUCCESS;
4612 }
4613
4614#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4615 /* Start a subset so we won't run out of mapping space. */
4616 PVMCPU pVCpu = VMMGetCpu(pVM);
4617 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
4618#endif
4619
4620 /*
4621 * Mark the page as being in need of an ASMMemZeroPage().
4622 */
4623 pPage->fZeroed = false;
4624
4625#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4626 if (pPage->fDirty)
4627 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirty, false /* do not remove */);
4628#endif
4629
4630 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4631 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4632 fFlushRequired = true;
4633
4634 /*
4635 * Clear the page.
4636 */
4637 pgmPoolTrackClearPageUsers(pPool, pPage);
4638 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4639 pgmPoolTrackDeref(pPool, pPage);
4640 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4641
4642 /*
4643 * Flush it from the cache.
4644 */
4645 pgmPoolCacheFlushPage(pPool, pPage);
4646
4647#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_R0) || defined(IN_RC)
4648 /* Heavy stuff done. */
4649 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
4650#endif
4651
4652 /*
4653 * Deregistering the monitoring.
4654 */
4655 if (pPage->fMonitored)
4656 rc = pgmPoolMonitorFlush(pPool, pPage);
4657
4658 /*
4659 * Free the page.
4660 */
4661 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4662 pPage->iNext = pPool->iFreeHead;
4663 pPool->iFreeHead = pPage->idx;
4664 pPage->enmKind = PGMPOOLKIND_FREE;
4665 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4666 pPage->GCPhys = NIL_RTGCPHYS;
4667 pPage->fReusedFlushPending = false;
4668
4669 pPool->cUsedPages--;
4670
4671 /* Flush the TLBs of all VCPUs if required. */
4672 if ( fFlushRequired
4673 && fFlush)
4674 {
4675 PGM_INVL_ALL_VCPU_TLBS(pVM);
4676 }
4677
4678 pgmUnlock(pVM);
4679 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4680 return rc;
4681}
4682
4683
4684/**
4685 * Frees a usage of a pool page.
4686 *
4687 * The caller is responsible to updating the user table so that it no longer
4688 * references the shadow page.
4689 *
4690 * @param pPool The pool.
4691 * @param HCPhys The HC physical address of the shadow page.
4692 * @param iUser The shadow page pool index of the user table.
4693 * @param iUserTable The index into the user table (shadowed).
4694 */
4695void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4696{
4697 PVM pVM = pPool->CTX_SUFF(pVM);
4698
4699 STAM_PROFILE_START(&pPool->StatFree, a);
4700 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%#x iUserTable=%#x\n",
4701 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4702 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4703 pgmLock(pVM);
4704 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4705 if (!pPage->fCached)
4706 pgmPoolFlushPage(pPool, pPage);
4707 pgmUnlock(pVM);
4708 STAM_PROFILE_STOP(&pPool->StatFree, a);
4709}
4710
4711
4712/**
4713 * Makes one or more free page free.
4714 *
4715 * @returns VBox status code.
4716 * @retval VINF_SUCCESS on success.
4717 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4718 *
4719 * @param pPool The pool.
4720 * @param enmKind Page table kind
4721 * @param iUser The user of the page.
4722 */
4723static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4724{
4725 PVM pVM = pPool->CTX_SUFF(pVM);
4726
4727 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
4728
4729 /*
4730 * If the pool isn't full grown yet, expand it.
4731 */
4732 if ( pPool->cCurPages < pPool->cMaxPages
4733#if defined(IN_RC)
4734 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
4735 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4736 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
4737#endif
4738 )
4739 {
4740 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4741#ifdef IN_RING3
4742 int rc = PGMR3PoolGrow(pVM);
4743#else
4744 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
4745#endif
4746 if (RT_FAILURE(rc))
4747 return rc;
4748 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4749 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4750 return VINF_SUCCESS;
4751 }
4752
4753 /*
4754 * Free one cached page.
4755 */
4756 return pgmPoolCacheFreeOne(pPool, iUser);
4757}
4758
4759/**
4760 * Allocates a page from the pool.
4761 *
4762 * This page may actually be a cached page and not in need of any processing
4763 * on the callers part.
4764 *
4765 * @returns VBox status code.
4766 * @retval VINF_SUCCESS if a NEW page was allocated.
4767 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4768 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4769 * @param pVM The VM handle.
4770 * @param GCPhys The GC physical address of the page we're gonna shadow.
4771 * For 4MB and 2MB PD entries, it's the first address the
4772 * shadow PT is covering.
4773 * @param enmKind The kind of mapping.
4774 * @param enmAccess Access type for the mapping (only relevant for big pages)
4775 * @param iUser The shadow page pool index of the user table.
4776 * @param iUserTable The index into the user table (shadowed).
4777 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4778 * @param fLockPage Lock the page
4779 */
4780int pgmPoolAllocEx(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage, bool fLockPage)
4781{
4782 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4783 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4784 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%#x iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4785 *ppPage = NULL;
4786 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4787 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4788 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4789
4790 pgmLock(pVM);
4791
4792 if (pPool->fCacheEnabled)
4793 {
4794 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, iUser, iUserTable, ppPage);
4795 if (RT_SUCCESS(rc2))
4796 {
4797 if (fLockPage)
4798 pgmPoolLockPage(pPool, *ppPage);
4799 pgmUnlock(pVM);
4800 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4801 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4802 return rc2;
4803 }
4804 }
4805
4806 /*
4807 * Allocate a new one.
4808 */
4809 int rc = VINF_SUCCESS;
4810 uint16_t iNew = pPool->iFreeHead;
4811 if (iNew == NIL_PGMPOOL_IDX)
4812 {
4813 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
4814 if (RT_FAILURE(rc))
4815 {
4816 pgmUnlock(pVM);
4817 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4818 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4819 return rc;
4820 }
4821 iNew = pPool->iFreeHead;
4822 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4823 }
4824
4825 /* unlink the free head */
4826 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4827 pPool->iFreeHead = pPage->iNext;
4828 pPage->iNext = NIL_PGMPOOL_IDX;
4829
4830 /*
4831 * Initialize it.
4832 */
4833 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4834 pPage->enmKind = enmKind;
4835 pPage->enmAccess = enmAccess;
4836 pPage->GCPhys = GCPhys;
4837 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4838 pPage->fMonitored = false;
4839 pPage->fCached = false;
4840#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4841 pPage->fDirty = false;
4842#endif
4843 pPage->fReusedFlushPending = false;
4844 pPage->cModifications = 0;
4845 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4846 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4847 pPage->cLocked = 0;
4848 pPage->cPresent = 0;
4849 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
4850 pPage->pvLastAccessHandlerFault = 0;
4851 pPage->cLastAccessHandlerCount = 0;
4852 pPage->pvLastAccessHandlerRip = 0;
4853
4854 /*
4855 * Insert into the tracking and cache. If this fails, free the page.
4856 */
4857 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4858 if (RT_FAILURE(rc3))
4859 {
4860 pPool->cUsedPages--;
4861 pPage->enmKind = PGMPOOLKIND_FREE;
4862 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4863 pPage->GCPhys = NIL_RTGCPHYS;
4864 pPage->iNext = pPool->iFreeHead;
4865 pPool->iFreeHead = pPage->idx;
4866 pgmUnlock(pVM);
4867 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4868 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4869 return rc3;
4870 }
4871
4872 /*
4873 * Commit the allocation, clear the page and return.
4874 */
4875#ifdef VBOX_WITH_STATISTICS
4876 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4877 pPool->cUsedPagesHigh = pPool->cUsedPages;
4878#endif
4879
4880 if (!pPage->fZeroed)
4881 {
4882 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4883 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4884 ASMMemZeroPage(pv);
4885 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4886 }
4887
4888 *ppPage = pPage;
4889 if (fLockPage)
4890 pgmPoolLockPage(pPool, pPage);
4891 pgmUnlock(pVM);
4892 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4893 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4894 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4895 return rc;
4896}
4897
4898
4899/**
4900 * Frees a usage of a pool page.
4901 *
4902 * @param pVM The VM handle.
4903 * @param HCPhys The HC physical address of the shadow page.
4904 * @param iUser The shadow page pool index of the user table.
4905 * @param iUserTable The index into the user table (shadowed).
4906 */
4907void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4908{
4909 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4910 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4911 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4912}
4913
4914/**
4915 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
4916 *
4917 * @returns Pointer to the shadow page structure.
4918 * @param pPool The pool.
4919 * @param HCPhys The HC physical address of the shadow page.
4920 */
4921PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
4922{
4923 PVM pVM = pPool->CTX_SUFF(pVM);
4924
4925 Assert(PGMIsLockOwner(pVM));
4926
4927 /*
4928 * Look up the page.
4929 */
4930 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
4931
4932 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
4933 return pPage;
4934}
4935
4936#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
4937/**
4938 * Flush the specified page if present
4939 *
4940 * @param pVM The VM handle.
4941 * @param GCPhys Guest physical address of the page to flush
4942 */
4943void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
4944{
4945 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4946
4947 VM_ASSERT_EMT(pVM);
4948
4949 /*
4950 * Look up the GCPhys in the hash.
4951 */
4952 GCPhys = GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
4953 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
4954 if (i == NIL_PGMPOOL_IDX)
4955 return;
4956
4957 do
4958 {
4959 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4960 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
4961 {
4962 switch (pPage->enmKind)
4963 {
4964 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4965 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4966 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4967 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4968 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4969 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4970 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4971 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4972 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4973 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4974 case PGMPOOLKIND_64BIT_PML4:
4975 case PGMPOOLKIND_32BIT_PD:
4976 case PGMPOOLKIND_PAE_PDPT:
4977 {
4978 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
4979#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4980 if (pPage->fDirty)
4981 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
4982 else
4983#endif
4984 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
4985 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
4986 pgmPoolMonitorChainFlush(pPool, pPage);
4987 return;
4988 }
4989
4990 /* ignore, no monitoring. */
4991 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4992 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4993 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4994 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4995 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4996 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4997 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4998 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4999 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5000 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5001 case PGMPOOLKIND_ROOT_NESTED:
5002 case PGMPOOLKIND_PAE_PD_PHYS:
5003 case PGMPOOLKIND_PAE_PDPT_PHYS:
5004 case PGMPOOLKIND_32BIT_PD_PHYS:
5005 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5006 break;
5007
5008 default:
5009 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5010 }
5011 }
5012
5013 /* next */
5014 i = pPage->iNext;
5015 } while (i != NIL_PGMPOOL_IDX);
5016 return;
5017}
5018#endif /* IN_RING3 */
5019
5020#ifdef IN_RING3
5021
5022
5023/**
5024 * Reset CPU on hot plugging.
5025 *
5026 * @param pVM The VM handle.
5027 * @param pVCpu The virtual CPU.
5028 */
5029void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5030{
5031 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
5032
5033 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5034 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5035 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5036}
5037
5038
5039/**
5040 * Flushes the entire cache.
5041 *
5042 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5043 * this and execute this CR3 flush.
5044 *
5045 * @param pPool The pool.
5046 */
5047void pgmR3PoolReset(PVM pVM)
5048{
5049 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5050
5051 Assert(PGMIsLockOwner(pVM));
5052 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5053 LogFlow(("pgmR3PoolReset:\n"));
5054
5055 /*
5056 * If there are no pages in the pool, there is nothing to do.
5057 */
5058 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5059 {
5060 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5061 return;
5062 }
5063
5064 /*
5065 * Exit the shadow mode since we're going to clear everything,
5066 * including the root page.
5067 */
5068 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5069 {
5070 PVMCPU pVCpu = &pVM->aCpus[i];
5071 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
5072 }
5073
5074 /*
5075 * Nuke the free list and reinsert all pages into it.
5076 */
5077 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5078 {
5079 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5080
5081 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
5082 if (pPage->fMonitored)
5083 pgmPoolMonitorFlush(pPool, pPage);
5084 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5085 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5086 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5087 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5088 pPage->cModifications = 0;
5089 pPage->GCPhys = NIL_RTGCPHYS;
5090 pPage->enmKind = PGMPOOLKIND_FREE;
5091 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5092 Assert(pPage->idx == i);
5093 pPage->iNext = i + 1;
5094 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5095 pPage->fSeenNonGlobal = false;
5096 pPage->fMonitored = false;
5097#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5098 pPage->fDirty = false;
5099#endif
5100 pPage->fCached = false;
5101 pPage->fReusedFlushPending = false;
5102 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5103 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5104 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5105 pPage->cLocked = 0;
5106 }
5107 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5108 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5109 pPool->cUsedPages = 0;
5110
5111 /*
5112 * Zap and reinitialize the user records.
5113 */
5114 pPool->cPresent = 0;
5115 pPool->iUserFreeHead = 0;
5116 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5117 const unsigned cMaxUsers = pPool->cMaxUsers;
5118 for (unsigned i = 0; i < cMaxUsers; i++)
5119 {
5120 paUsers[i].iNext = i + 1;
5121 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5122 paUsers[i].iUserTable = 0xfffffffe;
5123 }
5124 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5125
5126 /*
5127 * Clear all the GCPhys links and rebuild the phys ext free list.
5128 */
5129 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
5130 pRam;
5131 pRam = pRam->CTX_SUFF(pNext))
5132 {
5133 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5134 while (iPage-- > 0)
5135 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
5136 }
5137
5138 pPool->iPhysExtFreeHead = 0;
5139 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5140 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5141 for (unsigned i = 0; i < cMaxPhysExts; i++)
5142 {
5143 paPhysExts[i].iNext = i + 1;
5144 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5145 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5146 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5147 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5148 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5149 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5150 }
5151 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5152
5153 /*
5154 * Just zap the modified list.
5155 */
5156 pPool->cModifiedPages = 0;
5157 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5158
5159 /*
5160 * Clear the GCPhys hash and the age list.
5161 */
5162 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5163 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5164 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5165 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5166
5167#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5168 /* Clear all dirty pages. */
5169 pPool->idxFreeDirtyPage = 0;
5170 pPool->cDirtyPages = 0;
5171 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
5172 pPool->aIdxDirtyPages[i] = NIL_PGMPOOL_IDX;
5173#endif
5174
5175 /*
5176 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5177 */
5178 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
5179 {
5180 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5181 pPage->iNext = NIL_PGMPOOL_IDX;
5182 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5183 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5184 pPage->cModifications = 0;
5185 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
5186 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5187 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5188 if (pPage->fMonitored)
5189 {
5190 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
5191 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
5192 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
5193 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
5194 pPool->pszAccessHandler);
5195 AssertFatalRCSuccess(rc);
5196 pgmPoolHashInsert(pPool, pPage);
5197 }
5198 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
5199 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
5200 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
5201 }
5202
5203 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5204 {
5205 /*
5206 * Re-enter the shadowing mode and assert Sync CR3 FF.
5207 */
5208 PVMCPU pVCpu = &pVM->aCpus[i];
5209 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5210 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5211 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5212 }
5213
5214 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5215}
5216#endif /* IN_RING3 */
5217
5218#ifdef LOG_ENABLED
5219static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5220{
5221 switch(enmKind)
5222 {
5223 case PGMPOOLKIND_INVALID:
5224 return "PGMPOOLKIND_INVALID";
5225 case PGMPOOLKIND_FREE:
5226 return "PGMPOOLKIND_FREE";
5227 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5228 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5229 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5230 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5231 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5232 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5233 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5234 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5235 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5236 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5237 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5238 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5239 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5240 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5241 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5242 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5243 case PGMPOOLKIND_32BIT_PD:
5244 return "PGMPOOLKIND_32BIT_PD";
5245 case PGMPOOLKIND_32BIT_PD_PHYS:
5246 return "PGMPOOLKIND_32BIT_PD_PHYS";
5247 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5248 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5249 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5250 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5251 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5252 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5253 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5254 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5255 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5256 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5257 case PGMPOOLKIND_PAE_PD_PHYS:
5258 return "PGMPOOLKIND_PAE_PD_PHYS";
5259 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5260 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5261 case PGMPOOLKIND_PAE_PDPT:
5262 return "PGMPOOLKIND_PAE_PDPT";
5263 case PGMPOOLKIND_PAE_PDPT_PHYS:
5264 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5265 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5266 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5267 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5268 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5269 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5270 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5271 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5272 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5273 case PGMPOOLKIND_64BIT_PML4:
5274 return "PGMPOOLKIND_64BIT_PML4";
5275 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5276 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5277 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5278 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5279 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5280 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5281 case PGMPOOLKIND_ROOT_NESTED:
5282 return "PGMPOOLKIND_ROOT_NESTED";
5283 }
5284 return "Unknown kind!";
5285}
5286#endif /* LOG_ENABLED*/
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette