VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 86488

Last change on this file since 86488 was 86488, checked in by vboxsync, 4 years ago

VMM/PGM: Working on eliminating page table bitfield use (32-bit PDEs). bugref:9841 bugref:9746

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 210.1 KB
Line 
1/* $Id: PGMAllPool.cpp 86488 2020-10-08 08:32:24Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2020 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#define VBOX_WITHOUT_PAGING_BIT_FIELDS /* 64-bit bitfields are just asking for trouble. See @bugref{9841} and others. */
24#include <VBox/vmm/pgm.h>
25#include <VBox/vmm/mm.h>
26#include <VBox/vmm/em.h>
27#include <VBox/vmm/cpum.h>
28#include "PGMInternal.h"
29#include <VBox/vmm/vmcc.h>
30#include "PGMInline.h"
31#include <VBox/disopcode.h>
32#include <VBox/vmm/hm_vmx.h>
33
34#include <VBox/log.h>
35#include <VBox/err.h>
36#include <iprt/asm.h>
37#include <iprt/asm-amd64-x86.h>
38#include <iprt/string.h>
39
40
41/*********************************************************************************************************************************
42* Internal Functions *
43*********************************************************************************************************************************/
44RT_C_DECLS_BEGIN
45#if 0 /* unused */
46DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
47DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
48#endif /* unused */
49static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
50static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
51static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
52static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
54static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
55#endif
56#if 0 /*defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)*/
57static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
58#endif
59
60int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage);
61PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
62void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
63void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
64
65RT_C_DECLS_END
66
67
68#if 0 /* unused */
69/**
70 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
71 *
72 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
73 * @param enmKind The page kind.
74 */
75DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
76{
77 switch (enmKind)
78 {
79 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
81 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
82 return true;
83 default:
84 return false;
85 }
86}
87#endif /* unused */
88
89
90/**
91 * Flushes a chain of pages sharing the same access monitor.
92 *
93 * @param pPool The pool.
94 * @param pPage A page in the chain.
95 */
96void pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
97{
98 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
99
100 /*
101 * Find the list head.
102 */
103 uint16_t idx = pPage->idx;
104 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
105 {
106 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
107 {
108 idx = pPage->iMonitoredPrev;
109 Assert(idx != pPage->idx);
110 pPage = &pPool->aPages[idx];
111 }
112 }
113
114 /*
115 * Iterate the list flushing each shadow page.
116 */
117 for (;;)
118 {
119 idx = pPage->iMonitoredNext;
120 Assert(idx != pPage->idx);
121 if (pPage->idx >= PGMPOOL_IDX_FIRST)
122 {
123 int rc2 = pgmPoolFlushPage(pPool, pPage);
124 AssertRC(rc2);
125 }
126 /* next */
127 if (idx == NIL_PGMPOOL_IDX)
128 break;
129 pPage = &pPool->aPages[idx];
130 }
131}
132
133
134/**
135 * Wrapper for getting the current context pointer to the entry being modified.
136 *
137 * @returns VBox status code suitable for scheduling.
138 * @param pVM The cross context VM structure.
139 * @param pvDst Destination address
140 * @param pvSrc Pointer to the mapping of @a GCPhysSrc or NULL depending
141 * on the context (e.g. \#PF in R0 & RC).
142 * @param GCPhysSrc The source guest physical address.
143 * @param cb Size of data to read
144 */
145DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVMCC pVM, void *pvDst, void const *pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
146{
147#if defined(IN_RING3)
148 NOREF(pVM); NOREF(GCPhysSrc);
149 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
150 return VINF_SUCCESS;
151#else
152 /** @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
153 NOREF(pvSrc);
154 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
155#endif
156}
157
158
159/**
160 * Process shadow entries before they are changed by the guest.
161 *
162 * For PT entries we will clear them. For PD entries, we'll simply check
163 * for mapping conflicts and set the SyncCR3 FF if found.
164 *
165 * @param pVCpu The cross context virtual CPU structure.
166 * @param pPool The pool.
167 * @param pPage The head page.
168 * @param GCPhysFault The guest physical fault address.
169 * @param pvAddress Pointer to the mapping of @a GCPhysFault or NULL
170 * depending on the context (e.g. \#PF in R0 & RC).
171 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
172 */
173static void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault,
174 void const *pvAddress, unsigned cbWrite)
175{
176 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
177 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
178 PVMCC pVM = pPool->CTX_SUFF(pVM);
179 NOREF(pVCpu);
180
181 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n",
182 (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))(uintptr_t)pvAddress, GCPhysFault, cbWrite));
183
184 for (;;)
185 {
186 union
187 {
188 void *pv;
189 PX86PT pPT;
190 PPGMSHWPTPAE pPTPae;
191 PX86PD pPD;
192 PX86PDPAE pPDPae;
193 PX86PDPT pPDPT;
194 PX86PML4 pPML4;
195 } uShw;
196
197 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s write=%#x\n",
198 pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
199
200 uShw.pv = NULL;
201 switch (pPage->enmKind)
202 {
203 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
204 {
205 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
206 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
207 const unsigned iShw = off / sizeof(X86PTE);
208 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
209 if (uShw.pPT->a[iShw].n.u1Present)
210 {
211 X86PTE GstPte;
212
213 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
214 AssertRC(rc);
215 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
216 pgmPoolTracDerefGCPhysHint(pPool, pPage,
217 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
218 GstPte.u & X86_PTE_PG_MASK,
219 iShw);
220 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
221 }
222 break;
223 }
224
225 /* page/2 sized */
226 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
227 {
228 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
229 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
230 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
231 {
232 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
233 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
234 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
235 {
236 X86PTE GstPte;
237 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
238 AssertRC(rc);
239
240 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
241 pgmPoolTracDerefGCPhysHint(pPool, pPage,
242 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
243 GstPte.u & X86_PTE_PG_MASK,
244 iShw);
245 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
246 }
247 }
248 break;
249 }
250
251 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
252 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
253 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
254 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
255 {
256 unsigned iGst = off / sizeof(X86PDE);
257 unsigned iShwPdpt = iGst / 256;
258 unsigned iShw = (iGst % 256) * 2;
259 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
260
261 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
262 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
263 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
264 {
265 for (unsigned i = 0; i < 2; i++)
266 {
267 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw + i].u;
268 if (uPde & X86_PDE_P)
269 {
270 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw + i, uPde));
271 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw + i);
272 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw + i].u, 0);
273 }
274
275 /* paranoia / a bit assumptive. */
276 if ( (off & 3)
277 && (off & 3) + cbWrite > 4)
278 {
279 const unsigned iShw2 = iShw + 2 + i;
280 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
281 {
282 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
283 if (uPde2 & X86_PDE_P)
284 {
285 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
286 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
287 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
288 }
289 }
290 }
291 }
292 }
293 break;
294 }
295
296 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
297 {
298 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
299 const unsigned iShw = off / sizeof(X86PTEPAE);
300 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
301 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
302 {
303 X86PTEPAE GstPte;
304 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
305 AssertRC(rc);
306
307 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
308 pgmPoolTracDerefGCPhysHint(pPool, pPage,
309 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
310 GstPte.u & X86_PTE_PAE_PG_MASK,
311 iShw);
312 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
313 }
314
315 /* paranoia / a bit assumptive. */
316 if ( (off & 7)
317 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
318 {
319 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
320 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
321
322 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
323 {
324 X86PTEPAE GstPte;
325 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
326 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
327 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
328 AssertRC(rc);
329 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
330 pgmPoolTracDerefGCPhysHint(pPool, pPage,
331 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
332 GstPte.u & X86_PTE_PAE_PG_MASK,
333 iShw2);
334 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
335 }
336 }
337 break;
338 }
339
340 case PGMPOOLKIND_32BIT_PD:
341 {
342 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
343 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
344
345 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
346 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
347 X86PGUINT const uPde = uShw.pPD->a[iShw].u;
348 if (uPde & X86_PDE_P)
349 {
350 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
351 pgmPoolFree(pVM, uPde & X86_PDE_PG_MASK, pPage->idx, iShw);
352 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
353 }
354
355 /* paranoia / a bit assumptive. */
356 if ( (off & 3)
357 && (off & 3) + cbWrite > sizeof(X86PTE))
358 {
359 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
360 if ( iShw2 != iShw
361 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
362 {
363 X86PGUINT const uPde2 = uShw.pPD->a[iShw2].u;
364 if (uPde2 & X86_PDE_P)
365 {
366 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
367 pgmPoolFree(pVM, uPde2 & X86_PDE_PG_MASK, pPage->idx, iShw2);
368 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
369 }
370 }
371 }
372#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). - not working any longer... */
373 if ( uShw.pPD->a[iShw].n.u1Present
374 && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
375 {
376 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
377 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
378 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
379 }
380#endif
381 break;
382 }
383
384 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
385 {
386 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
387 const unsigned iShw = off / sizeof(X86PDEPAE);
388 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
389
390 /*
391 * Causes trouble when the guest uses a PDE to refer to the whole page table level
392 * structure. (Invalidate here; faults later on when it tries to change the page
393 * table entries -> recheck; probably only applies to the RC case.)
394 */
395 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw].u;
396 if (uPde & X86_PDE_P)
397 {
398 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
399 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
400 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
401 }
402
403 /* paranoia / a bit assumptive. */
404 if ( (off & 7)
405 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
406 {
407 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
408 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
409
410 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
411 if (uPde2 & X86_PDE_P)
412 {
413 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
414 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
415 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
416 }
417 }
418 break;
419 }
420
421 case PGMPOOLKIND_PAE_PDPT:
422 {
423 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
424 /*
425 * Hopefully this doesn't happen very often:
426 * - touching unused parts of the page
427 * - messing with the bits of pd pointers without changing the physical address
428 */
429 /* PDPT roots are not page aligned; 32 byte only! */
430 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
431
432 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
433 const unsigned iShw = offPdpt / sizeof(X86PDPE);
434 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
435 {
436 X86PGPAEUINT const uPdpe = uShw.pPDPT->a[iShw].u;
437 if (uPdpe & X86_PDPE_P)
438 {
439 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
440 pgmPoolFree(pVM, uPdpe & X86_PDPE_PG_MASK, pPage->idx, iShw);
441 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
442 }
443
444 /* paranoia / a bit assumptive. */
445 if ( (offPdpt & 7)
446 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
447 {
448 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
449 if ( iShw2 != iShw
450 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
451 {
452 X86PGPAEUINT const uPdpe2 = uShw.pPDPT->a[iShw2].u;
453 if (uPdpe2 & X86_PDPE_P)
454 {
455 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
456 pgmPoolFree(pVM, uPdpe2 & X86_PDPE_PG_MASK, pPage->idx, iShw2);
457 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
458 }
459 }
460 }
461 }
462 break;
463 }
464
465 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
466 {
467 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
468 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
469 const unsigned iShw = off / sizeof(X86PDEPAE);
470 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw].u;
471#ifndef PGM_WITHOUT_MAPPINGS
472 Assert(!(uPde & PGM_PDFLAGS_MAPPING));
473#endif
474 if (uPde & X86_PDE_P)
475 {
476 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
477 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
478 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
479 }
480
481 /* paranoia / a bit assumptive. */
482 if ( (off & 7)
483 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
484 {
485 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
486 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
487 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
488#ifndef PGM_WITHOUT_MAPPINGS
489 Assert(!(uPde2 & PGM_PDFLAGS_MAPPING));
490#endif
491 if (uPde2 & X86_PDE_P)
492 {
493 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
494 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
495 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
496 }
497 }
498 break;
499 }
500
501 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
502 {
503 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
504 /*
505 * Hopefully this doesn't happen very often:
506 * - messing with the bits of pd pointers without changing the physical address
507 */
508 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
509 const unsigned iShw = off / sizeof(X86PDPE);
510 X86PGPAEUINT const uPdpe = uShw.pPDPT->a[iShw].u;
511 if (uPdpe & X86_PDPE_P)
512 {
513 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uPdpe));
514 pgmPoolFree(pVM, uPdpe & X86_PDPE_PG_MASK, pPage->idx, iShw);
515 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
516 }
517 /* paranoia / a bit assumptive. */
518 if ( (off & 7)
519 && (off & 7) + cbWrite > sizeof(X86PDPE))
520 {
521 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
522 X86PGPAEUINT const uPdpe2 = uShw.pPDPT->a[iShw2].u;
523 if (uPdpe2 & X86_PDPE_P)
524 {
525 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPdpe2));
526 pgmPoolFree(pVM, uPdpe2 & X86_PDPE_PG_MASK, pPage->idx, iShw2);
527 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
528 }
529 }
530 break;
531 }
532
533 case PGMPOOLKIND_64BIT_PML4:
534 {
535 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
536 /*
537 * Hopefully this doesn't happen very often:
538 * - messing with the bits of pd pointers without changing the physical address
539 */
540 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
541 const unsigned iShw = off / sizeof(X86PDPE);
542 X86PGPAEUINT const uPml4e = uShw.pPML4->a[iShw].u;
543 if (uPml4e & X86_PML4E_P)
544 {
545 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uPml4e));
546 pgmPoolFree(pVM, uPml4e & X86_PML4E_PG_MASK, pPage->idx, iShw);
547 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
548 }
549 /* paranoia / a bit assumptive. */
550 if ( (off & 7)
551 && (off & 7) + cbWrite > sizeof(X86PDPE))
552 {
553 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
554 X86PGPAEUINT const uPml4e2 = uShw.pPML4->a[iShw2].u;
555 if (uPml4e2 & X86_PML4E_P)
556 {
557 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPml4e2));
558 pgmPoolFree(pVM, uPml4e2 & X86_PML4E_PG_MASK, pPage->idx, iShw2);
559 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
560 }
561 }
562 break;
563 }
564
565 default:
566 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
567 }
568 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
569
570 /* next */
571 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
572 return;
573 pPage = &pPool->aPages[pPage->iMonitoredNext];
574 }
575}
576
577#ifndef IN_RING3
578
579/**
580 * Checks if a access could be a fork operation in progress.
581 *
582 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
583 *
584 * @returns true if it's likely that we're forking, otherwise false.
585 * @param pPool The pool.
586 * @param pDis The disassembled instruction.
587 * @param offFault The access offset.
588 */
589DECLINLINE(bool) pgmRZPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
590{
591 /*
592 * i386 linux is using btr to clear X86_PTE_RW.
593 * The functions involved are (2.6.16 source inspection):
594 * clear_bit
595 * ptep_set_wrprotect
596 * copy_one_pte
597 * copy_pte_range
598 * copy_pmd_range
599 * copy_pud_range
600 * copy_page_range
601 * dup_mmap
602 * dup_mm
603 * copy_mm
604 * copy_process
605 * do_fork
606 */
607 if ( pDis->pCurInstr->uOpcode == OP_BTR
608 && !(offFault & 4)
609 /** @todo Validate that the bit index is X86_PTE_RW. */
610 )
611 {
612 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,Fork)); RT_NOREF_PV(pPool);
613 return true;
614 }
615 return false;
616}
617
618
619/**
620 * Determine whether the page is likely to have been reused.
621 *
622 * @returns true if we consider the page as being reused for a different purpose.
623 * @returns false if we consider it to still be a paging page.
624 * @param pVM The cross context VM structure.
625 * @param pVCpu The cross context virtual CPU structure.
626 * @param pRegFrame Trap register frame.
627 * @param pDis The disassembly info for the faulting instruction.
628 * @param pvFault The fault address.
629 * @param pPage The pool page being accessed.
630 *
631 * @remark The REP prefix check is left to the caller because of STOSD/W.
632 */
633DECLINLINE(bool) pgmRZPoolMonitorIsReused(PVMCC pVM, PVMCPUCC pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault,
634 PPGMPOOLPAGE pPage)
635{
636 /* Locked (CR3, PDPTR*4) should not be reusable. Considering them as
637 such may cause loops booting tst-ubuntu-15_10-64-efi, ++. */
638 if (pPage->cLocked)
639 {
640 Log2(("pgmRZPoolMonitorIsReused: %RGv (%p) can't have been resued, because it's locked!\n", pvFault, pPage));
641 return false;
642 }
643
644 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
645 if ( HMHasPendingIrq(pVM)
646 && pRegFrame->rsp - pvFault < 32)
647 {
648 /* Fault caused by stack writes while trying to inject an interrupt event. */
649 Log(("pgmRZPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
650 return true;
651 }
652
653 LogFlow(("Reused instr %RGv %d at %RGv param1.fUse=%llx param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->uOpcode, pvFault, pDis->Param1.fUse, pDis->Param1.Base.idxGenReg));
654
655 /* Non-supervisor mode write means it's used for something else. */
656 if (CPUMGetGuestCPL(pVCpu) == 3)
657 return true;
658
659 switch (pDis->pCurInstr->uOpcode)
660 {
661 /* call implies the actual push of the return address faulted */
662 case OP_CALL:
663 Log4(("pgmRZPoolMonitorIsReused: CALL\n"));
664 return true;
665 case OP_PUSH:
666 Log4(("pgmRZPoolMonitorIsReused: PUSH\n"));
667 return true;
668 case OP_PUSHF:
669 Log4(("pgmRZPoolMonitorIsReused: PUSHF\n"));
670 return true;
671 case OP_PUSHA:
672 Log4(("pgmRZPoolMonitorIsReused: PUSHA\n"));
673 return true;
674 case OP_FXSAVE:
675 Log4(("pgmRZPoolMonitorIsReused: FXSAVE\n"));
676 return true;
677 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
678 Log4(("pgmRZPoolMonitorIsReused: MOVNTI\n"));
679 return true;
680 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
681 Log4(("pgmRZPoolMonitorIsReused: MOVNTDQ\n"));
682 return true;
683 case OP_MOVSWD:
684 case OP_STOSWD:
685 if ( pDis->fPrefix == (DISPREFIX_REP|DISPREFIX_REX)
686 && pRegFrame->rcx >= 0x40
687 )
688 {
689 Assert(pDis->uCpuMode == DISCPUMODE_64BIT);
690
691 Log(("pgmRZPoolMonitorIsReused: OP_STOSQ\n"));
692 return true;
693 }
694 break;
695
696 default:
697 /*
698 * Anything having ESP on the left side means stack writes.
699 */
700 if ( ( (pDis->Param1.fUse & DISUSE_REG_GEN32)
701 || (pDis->Param1.fUse & DISUSE_REG_GEN64))
702 && (pDis->Param1.Base.idxGenReg == DISGREG_ESP))
703 {
704 Log4(("pgmRZPoolMonitorIsReused: ESP\n"));
705 return true;
706 }
707 break;
708 }
709
710 /*
711 * Page table updates are very very unlikely to be crossing page boundraries,
712 * and we don't want to deal with that in pgmPoolMonitorChainChanging and such.
713 */
714 uint32_t const cbWrite = DISGetParamSize(pDis, &pDis->Param1);
715 if ( (((uintptr_t)pvFault + cbWrite) >> X86_PAGE_SHIFT) != ((uintptr_t)pvFault >> X86_PAGE_SHIFT) )
716 {
717 Log4(("pgmRZPoolMonitorIsReused: cross page write\n"));
718 return true;
719 }
720
721 /*
722 * Nobody does an unaligned 8 byte write to a page table, right.
723 */
724 if (cbWrite >= 8 && ((uintptr_t)pvFault & 7) != 0)
725 {
726 Log4(("pgmRZPoolMonitorIsReused: Unaligned 8+ byte write\n"));
727 return true;
728 }
729
730 return false;
731}
732
733
734/**
735 * Flushes the page being accessed.
736 *
737 * @returns VBox status code suitable for scheduling.
738 * @param pVM The cross context VM structure.
739 * @param pVCpu The cross context virtual CPU structure.
740 * @param pPool The pool.
741 * @param pPage The pool page (head).
742 * @param pDis The disassembly of the write instruction.
743 * @param pRegFrame The trap register frame.
744 * @param GCPhysFault The fault address as guest physical address.
745 * @param pvFault The fault address.
746 * @todo VBOXSTRICTRC
747 */
748static int pgmRZPoolAccessPfHandlerFlush(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
749 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
750{
751 NOREF(pVM); NOREF(GCPhysFault);
752
753 /*
754 * First, do the flushing.
755 */
756 pgmPoolMonitorChainFlush(pPool, pPage);
757
758 /*
759 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
760 * Must do this in raw mode (!); XP boot will fail otherwise.
761 */
762 int rc = VINF_SUCCESS;
763 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
764 if (rc2 == VINF_SUCCESS)
765 { /* do nothing */ }
766 else if (rc2 == VINF_EM_RESCHEDULE)
767 {
768 rc = VBOXSTRICTRC_VAL(rc2);
769# ifndef IN_RING3
770 VMCPU_FF_SET(pVCpu, VMCPU_FF_TO_R3);
771# endif
772 }
773 else if (rc2 == VERR_EM_INTERPRETER)
774 {
775 rc = VINF_EM_RAW_EMULATE_INSTR;
776 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
777 }
778 else if (RT_FAILURE_NP(rc2))
779 rc = VBOXSTRICTRC_VAL(rc2);
780 else
781 AssertMsgFailed(("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
782
783 LogFlow(("pgmRZPoolAccessPfHandlerFlush: returns %Rrc (flushed)\n", rc));
784 return rc;
785}
786
787
788/**
789 * Handles the STOSD write accesses.
790 *
791 * @returns VBox status code suitable for scheduling.
792 * @param pVM The cross context VM structure.
793 * @param pPool The pool.
794 * @param pPage The pool page (head).
795 * @param pDis The disassembly of the write instruction.
796 * @param pRegFrame The trap register frame.
797 * @param GCPhysFault The fault address as guest physical address.
798 * @param pvFault The fault address.
799 */
800DECLINLINE(int) pgmRZPoolAccessPfHandlerSTOSD(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
801 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
802{
803 unsigned uIncrement = pDis->Param1.cb;
804 NOREF(pVM);
805
806 Assert(pDis->uCpuMode == DISCPUMODE_32BIT || pDis->uCpuMode == DISCPUMODE_64BIT);
807 Assert(pRegFrame->rcx <= 0x20);
808
809# ifdef VBOX_STRICT
810 if (pDis->uOpMode == DISCPUMODE_32BIT)
811 Assert(uIncrement == 4);
812 else
813 Assert(uIncrement == 8);
814# endif
815
816 Log3(("pgmRZPoolAccessPfHandlerSTOSD\n"));
817
818 /*
819 * Increment the modification counter and insert it into the list
820 * of modified pages the first time.
821 */
822 if (!pPage->cModifications++)
823 pgmPoolMonitorModifiedInsert(pPool, pPage);
824
825 /*
826 * Execute REP STOSD.
827 *
828 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
829 * write situation, meaning that it's safe to write here.
830 */
831 PVMCPUCC pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
832 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
833 while (pRegFrame->rcx)
834 {
835# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
836 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
837 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
838 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
839# else
840 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
841# endif
842 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
843 pu32 += uIncrement;
844 GCPhysFault += uIncrement;
845 pRegFrame->rdi += uIncrement;
846 pRegFrame->rcx--;
847 }
848 pRegFrame->rip += pDis->cbInstr;
849
850 LogFlow(("pgmRZPoolAccessPfHandlerSTOSD: returns\n"));
851 return VINF_SUCCESS;
852}
853
854
855/**
856 * Handles the simple write accesses.
857 *
858 * @returns VBox status code suitable for scheduling.
859 * @param pVM The cross context VM structure.
860 * @param pVCpu The cross context virtual CPU structure.
861 * @param pPool The pool.
862 * @param pPage The pool page (head).
863 * @param pDis The disassembly of the write instruction.
864 * @param pRegFrame The trap register frame.
865 * @param GCPhysFault The fault address as guest physical address.
866 * @param pvFault The fault address.
867 * @param pfReused Reused state (in/out)
868 */
869DECLINLINE(int) pgmRZPoolAccessPfHandlerSimple(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
870 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
871{
872 Log3(("pgmRZPoolAccessPfHandlerSimple\n"));
873 NOREF(pVM);
874 NOREF(pfReused); /* initialized by caller */
875
876 /*
877 * Increment the modification counter and insert it into the list
878 * of modified pages the first time.
879 */
880 if (!pPage->cModifications++)
881 pgmPoolMonitorModifiedInsert(pPool, pPage);
882
883 /*
884 * Clear all the pages. ASSUMES that pvFault is readable.
885 */
886# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
887 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
888# endif
889
890 uint32_t cbWrite = DISGetParamSize(pDis, &pDis->Param1);
891 if (cbWrite <= 8)
892 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, cbWrite);
893 else if (cbWrite <= 16)
894 {
895 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, 8);
896 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + 8, NULL, cbWrite - 8);
897 }
898 else
899 {
900 Assert(cbWrite <= 32);
901 for (uint32_t off = 0; off < cbWrite; off += 8)
902 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + off, NULL, RT_MIN(8, cbWrite - off));
903 }
904
905# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
906 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
907# endif
908
909 /*
910 * Interpret the instruction.
911 */
912 VBOXSTRICTRC rc = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
913 if (RT_SUCCESS(rc))
914 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
915 else if (rc == VERR_EM_INTERPRETER)
916 {
917 LogFlow(("pgmRZPoolAccessPfHandlerSimple: Interpretation failed for %04x:%RGv - opcode=%d\n",
918 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode));
919 rc = VINF_EM_RAW_EMULATE_INSTR;
920 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
921 }
922
923# if 0 /* experimental code */
924 if (rc == VINF_SUCCESS)
925 {
926 switch (pPage->enmKind)
927 {
928 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
929 {
930 X86PTEPAE GstPte;
931 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
932 AssertRC(rc);
933
934 /* Check the new value written by the guest. If present and with a bogus physical address, then
935 * it's fairly safe to assume the guest is reusing the PT.
936 */
937 if (GstPte.n.u1Present)
938 {
939 RTHCPHYS HCPhys = -1;
940 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
941 if (rc != VINF_SUCCESS)
942 {
943 *pfReused = true;
944 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
945 }
946 }
947 break;
948 }
949 }
950 }
951# endif
952
953 LogFlow(("pgmRZPoolAccessPfHandlerSimple: returns %Rrc\n", VBOXSTRICTRC_VAL(rc)));
954 return VBOXSTRICTRC_VAL(rc);
955}
956
957
958/**
959 * @callback_method_impl{FNPGMRZPHYSPFHANDLER,
960 * \#PF access handler callback for page table pages.}
961 *
962 * @remarks The @a pvUser argument points to the PGMPOOLPAGE.
963 */
964DECLEXPORT(VBOXSTRICTRC) pgmRZPoolAccessPfHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame,
965 RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
966{
967 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorRZ, a);
968 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
969 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
970 unsigned cMaxModifications;
971 bool fForcedFlush = false;
972 NOREF(uErrorCode);
973
974 LogFlow(("pgmRZPoolAccessPfHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
975
976 pgmLock(pVM);
977 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
978 {
979 /* Pool page changed while we were waiting for the lock; ignore. */
980 Log(("CPU%d: pgmRZPoolAccessPfHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
981 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
982 pgmUnlock(pVM);
983 return VINF_SUCCESS;
984 }
985# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
986 if (pPage->fDirty)
987 {
988 Assert(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH));
989 pgmUnlock(pVM);
990 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
991 }
992# endif
993
994# if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
995 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
996 {
997 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
998 void *pvGst;
999 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1000 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1001 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1002 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1003 }
1004# endif
1005
1006 /*
1007 * Disassemble the faulting instruction.
1008 */
1009 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1010 int rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
1011 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1012 {
1013 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1014 pgmUnlock(pVM);
1015 return rc;
1016 }
1017
1018 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1019
1020 /*
1021 * We should ALWAYS have the list head as user parameter. This
1022 * is because we use that page to record the changes.
1023 */
1024 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1025
1026# ifdef IN_RING0
1027 /* Maximum nr of modifications depends on the page type. */
1028 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1029 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1030 cMaxModifications = 4;
1031 else
1032 cMaxModifications = 24;
1033# else
1034 cMaxModifications = 48;
1035# endif
1036
1037 /*
1038 * Incremental page table updates should weigh more than random ones.
1039 * (Only applies when started from offset 0)
1040 */
1041 pVCpu->pgm.s.cPoolAccessHandler++;
1042 if ( pPage->GCPtrLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1043 && pPage->GCPtrLastAccessHandlerRip < pRegFrame->rip + 0x40
1044 && pvFault == (pPage->GCPtrLastAccessHandlerFault + pDis->Param1.cb)
1045 && pVCpu->pgm.s.cPoolAccessHandler == pPage->cLastAccessHandler + 1)
1046 {
1047 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1048 Assert(pPage->cModifications < 32000);
1049 pPage->cModifications = pPage->cModifications * 2;
1050 pPage->GCPtrLastAccessHandlerFault = pvFault;
1051 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1052 if (pPage->cModifications >= cMaxModifications)
1053 {
1054 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushReinit);
1055 fForcedFlush = true;
1056 }
1057 }
1058
1059 if (pPage->cModifications >= cMaxModifications)
1060 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1061
1062 /*
1063 * Check if it's worth dealing with.
1064 */
1065 bool fReused = false;
1066 bool fNotReusedNotForking = false;
1067 if ( ( pPage->cModifications < cMaxModifications /** @todo \#define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1068 || pgmPoolIsPageLocked(pPage)
1069 )
1070 && !(fReused = pgmRZPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault, pPage))
1071 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1072 {
1073 /*
1074 * Simple instructions, no REP prefix.
1075 */
1076 if (!(pDis->fPrefix & (DISPREFIX_REP | DISPREFIX_REPNE)))
1077 {
1078 rc = pgmRZPoolAccessPfHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1079 if (fReused)
1080 goto flushPage;
1081
1082 /* A mov instruction to change the first page table entry will be remembered so we can detect
1083 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1084 */
1085 if ( rc == VINF_SUCCESS
1086 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1087 && pDis->pCurInstr->uOpcode == OP_MOV
1088 && (pvFault & PAGE_OFFSET_MASK) == 0)
1089 {
1090 pPage->GCPtrLastAccessHandlerFault = pvFault;
1091 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1092 pPage->GCPtrLastAccessHandlerRip = pRegFrame->rip;
1093 /* Make sure we don't kick out a page too quickly. */
1094 if (pPage->cModifications > 8)
1095 pPage->cModifications = 2;
1096 }
1097 else if (pPage->GCPtrLastAccessHandlerFault == pvFault)
1098 {
1099 /* ignore the 2nd write to this page table entry. */
1100 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1101 }
1102 else
1103 {
1104 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
1105 pPage->GCPtrLastAccessHandlerRip = 0;
1106 }
1107
1108 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
1109 pgmUnlock(pVM);
1110 return rc;
1111 }
1112
1113 /*
1114 * Windows is frequently doing small memset() operations (netio test 4k+).
1115 * We have to deal with these or we'll kill the cache and performance.
1116 */
1117 if ( pDis->pCurInstr->uOpcode == OP_STOSWD
1118 && !pRegFrame->eflags.Bits.u1DF
1119 && pDis->uOpMode == pDis->uCpuMode
1120 && pDis->uAddrMode == pDis->uCpuMode)
1121 {
1122 bool fValidStosd = false;
1123
1124 if ( pDis->uCpuMode == DISCPUMODE_32BIT
1125 && pDis->fPrefix == DISPREFIX_REP
1126 && pRegFrame->ecx <= 0x20
1127 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1128 && !((uintptr_t)pvFault & 3)
1129 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1130 )
1131 {
1132 fValidStosd = true;
1133 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1134 }
1135 else
1136 if ( pDis->uCpuMode == DISCPUMODE_64BIT
1137 && pDis->fPrefix == (DISPREFIX_REP | DISPREFIX_REX)
1138 && pRegFrame->rcx <= 0x20
1139 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1140 && !((uintptr_t)pvFault & 7)
1141 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1142 )
1143 {
1144 fValidStosd = true;
1145 }
1146
1147 if (fValidStosd)
1148 {
1149 rc = pgmRZPoolAccessPfHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1150 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZRepStosd, a);
1151 pgmUnlock(pVM);
1152 return rc;
1153 }
1154 }
1155
1156 /* REP prefix, don't bother. */
1157 STAM_COUNTER_INC(&pPool->StatMonitorPfRZRepPrefix);
1158 Log4(("pgmRZPoolAccessPfHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1159 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode, pDis->fPrefix));
1160 fNotReusedNotForking = true;
1161 }
1162
1163# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1164 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1165 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1166 */
1167 if ( pPage->cModifications >= cMaxModifications
1168 && !fForcedFlush
1169 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1170 && ( fNotReusedNotForking
1171 || ( !pgmRZPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault, pPage)
1172 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1173 )
1174 )
1175 {
1176 Assert(!pgmPoolIsPageLocked(pPage));
1177 Assert(pPage->fDirty == false);
1178
1179 /* Flush any monitored duplicates as we will disable write protection. */
1180 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1181 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1182 {
1183 PPGMPOOLPAGE pPageHead = pPage;
1184
1185 /* Find the monitor head. */
1186 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1187 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1188
1189 while (pPageHead)
1190 {
1191 unsigned idxNext = pPageHead->iMonitoredNext;
1192
1193 if (pPageHead != pPage)
1194 {
1195 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1196 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1197 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1198 AssertRC(rc2);
1199 }
1200
1201 if (idxNext == NIL_PGMPOOL_IDX)
1202 break;
1203
1204 pPageHead = &pPool->aPages[idxNext];
1205 }
1206 }
1207
1208 /* The flushing above might fail for locked pages, so double check. */
1209 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1210 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1211 {
1212 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1213
1214 /* Temporarily allow write access to the page table again. */
1215 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK, pPage->GCPhys & PAGE_BASE_GC_MASK);
1216 if (rc == VINF_SUCCESS)
1217 {
1218 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1219 AssertMsg(rc == VINF_SUCCESS
1220 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1221 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1222 || rc == VERR_PAGE_NOT_PRESENT,
1223 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1224# ifdef VBOX_STRICT
1225 pPage->GCPtrDirtyFault = pvFault;
1226# endif
1227
1228 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, a);
1229 pgmUnlock(pVM);
1230 return rc;
1231 }
1232 }
1233 }
1234# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT && IN_RING0 */
1235
1236 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushModOverflow);
1237flushPage:
1238 /*
1239 * Not worth it, so flush it.
1240 *
1241 * If we considered it to be reused, don't go back to ring-3
1242 * to emulate failed instructions since we usually cannot
1243 * interpret then. This may be a bit risky, in which case
1244 * the reuse detection must be fixed.
1245 */
1246 rc = pgmRZPoolAccessPfHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1247 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1248 && fReused)
1249 {
1250 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1251 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1252 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1253 }
1254 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZFlushPage, a);
1255 pgmUnlock(pVM);
1256 return rc;
1257}
1258
1259#endif /* !IN_RING3 */
1260
1261/**
1262 * @callback_method_impl{FNPGMPHYSHANDLER,
1263 * Access handler for shadowed page table pages.}
1264 *
1265 * @remarks Only uses the VINF_PGM_HANDLER_DO_DEFAULT status.
1266 */
1267PGM_ALL_CB2_DECL(VBOXSTRICTRC)
1268pgmPoolAccessHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf,
1269 PGMACCESSTYPE enmAccessType, PGMACCESSORIGIN enmOrigin, void *pvUser)
1270{
1271 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1272 STAM_PROFILE_START(&pPool->CTX_SUFF_Z(StatMonitor), a);
1273 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1274 LogFlow(("PGM_ALL_CB_DECL: GCPhys=%RGp %p:{.Core=%RHp, .idx=%d, .GCPhys=%RGp, .enmType=%d}\n",
1275 GCPhys, pPage, pPage->Core.Key, pPage->idx, pPage->GCPhys, pPage->enmKind));
1276
1277 NOREF(pvPhys); NOREF(pvBuf); NOREF(enmAccessType);
1278
1279 pgmLock(pVM);
1280
1281#ifdef VBOX_WITH_STATISTICS
1282 /*
1283 * Collect stats on the access.
1284 */
1285 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Sizes)) == 19);
1286 if (cbBuf <= 16 && cbBuf > 0)
1287 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[cbBuf - 1]);
1288 else if (cbBuf >= 17 && cbBuf < 32)
1289 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[16]);
1290 else if (cbBuf >= 32 && cbBuf < 64)
1291 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[17]);
1292 else if (cbBuf >= 64)
1293 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[18]);
1294
1295 uint8_t cbAlign;
1296 switch (pPage->enmKind)
1297 {
1298 default:
1299 cbAlign = 7;
1300 break;
1301 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1302 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1303 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1304 case PGMPOOLKIND_32BIT_PD:
1305 case PGMPOOLKIND_32BIT_PD_PHYS:
1306 cbAlign = 3;
1307 break;
1308 }
1309 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Misaligned)) == 7);
1310 if ((uint8_t)GCPhys & cbAlign)
1311 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Misaligned)[((uint8_t)GCPhys & cbAlign) - 1]);
1312#endif
1313
1314 /*
1315 * Make sure the pool page wasn't modified by a different CPU.
1316 */
1317 if (PHYS_PAGE_ADDRESS(GCPhys) == PHYS_PAGE_ADDRESS(pPage->GCPhys))
1318 {
1319 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1320
1321 /* The max modification count before flushing depends on the context and page type. */
1322#ifdef IN_RING3
1323 uint16_t const cMaxModifications = 96; /* it's cheaper here, right? */
1324#else
1325 uint16_t cMaxModifications;
1326 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1327 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1328 cMaxModifications = 4;
1329 else
1330 cMaxModifications = 24;
1331#endif
1332
1333 /*
1334 * We don't have to be very sophisticated about this since there are relativly few calls here.
1335 * However, we must try our best to detect any non-cpu accesses (disk / networking).
1336 */
1337 if ( ( pPage->cModifications < cMaxModifications
1338 || pgmPoolIsPageLocked(pPage) )
1339 && enmOrigin != PGMACCESSORIGIN_DEVICE
1340 && cbBuf <= 16)
1341 {
1342 /* Clear the shadow entry. */
1343 if (!pPage->cModifications++)
1344 pgmPoolMonitorModifiedInsert(pPool, pPage);
1345
1346 if (cbBuf <= 8)
1347 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, (uint32_t)cbBuf);
1348 else
1349 {
1350 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, 8);
1351 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys + 8, (uint8_t *)pvBuf + 8, (uint32_t)cbBuf - 8);
1352 }
1353 }
1354 else
1355 pgmPoolMonitorChainFlush(pPool, pPage);
1356
1357 STAM_PROFILE_STOP_EX(&pPool->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1358 }
1359 else
1360 Log(("CPU%d: PGM_ALL_CB_DECL pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhys), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1361 pgmUnlock(pVM);
1362 return VINF_PGM_HANDLER_DO_DEFAULT;
1363}
1364
1365
1366#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1367
1368# if defined(VBOX_STRICT) && !defined(IN_RING3)
1369
1370/**
1371 * Check references to guest physical memory in a PAE / PAE page table.
1372 *
1373 * @param pPool The pool.
1374 * @param pPage The page.
1375 * @param pShwPT The shadow page table (mapping of the page).
1376 * @param pGstPT The guest page table.
1377 */
1378static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1379{
1380 unsigned cErrors = 0;
1381 int LastRc = -1; /* initialized to shut up gcc */
1382 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1383 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1384 PVMCC pVM = pPool->CTX_SUFF(pVM);
1385
1386# ifdef VBOX_STRICT
1387 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1388 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1389# endif
1390 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1391 {
1392 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1393 {
1394 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1395 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1396 if ( rc != VINF_SUCCESS
1397 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1398 {
1399 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1400 LastPTE = i;
1401 LastRc = rc;
1402 LastHCPhys = HCPhys;
1403 cErrors++;
1404
1405 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1406 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1407 AssertRC(rc);
1408
1409 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1410 {
1411 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1412
1413 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1414 {
1415 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1416
1417 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1418 {
1419 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1420 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1421 {
1422 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1423 }
1424 }
1425
1426 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1427 }
1428 }
1429 }
1430 }
1431 }
1432 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1433}
1434
1435
1436/**
1437 * Check references to guest physical memory in a PAE / 32-bit page table.
1438 *
1439 * @param pPool The pool.
1440 * @param pPage The page.
1441 * @param pShwPT The shadow page table (mapping of the page).
1442 * @param pGstPT The guest page table.
1443 */
1444static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1445{
1446 unsigned cErrors = 0;
1447 int LastRc = -1; /* initialized to shut up gcc */
1448 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1449 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1450 PVMCC pVM = pPool->CTX_SUFF(pVM);
1451
1452# ifdef VBOX_STRICT
1453 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1454 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1455# endif
1456 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1457 {
1458 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1459 {
1460 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1461 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1462 if ( rc != VINF_SUCCESS
1463 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1464 {
1465 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1466 LastPTE = i;
1467 LastRc = rc;
1468 LastHCPhys = HCPhys;
1469 cErrors++;
1470
1471 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1472 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1473 AssertRC(rc);
1474
1475 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1476 {
1477 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1478
1479 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1480 {
1481 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1482
1483 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1484 {
1485 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1486 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1487 {
1488 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1489 }
1490 }
1491
1492 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1493 }
1494 }
1495 }
1496 }
1497 }
1498 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1499}
1500
1501# endif /* VBOX_STRICT && !IN_RING3 */
1502
1503/**
1504 * Clear references to guest physical memory in a PAE / PAE page table.
1505 *
1506 * @returns nr of changed PTEs
1507 * @param pPool The pool.
1508 * @param pPage The page.
1509 * @param pShwPT The shadow page table (mapping of the page).
1510 * @param pGstPT The guest page table.
1511 * @param pOldGstPT The old cached guest page table.
1512 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1513 * @param pfFlush Flush reused page table (out)
1514 */
1515DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1516 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1517{
1518 unsigned cChanged = 0;
1519
1520# ifdef VBOX_STRICT
1521 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1522 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1523# endif
1524 *pfFlush = false;
1525
1526 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1527 {
1528 /* Check the new value written by the guest. If present and with a bogus physical address, then
1529 * it's fairly safe to assume the guest is reusing the PT.
1530 */
1531 if ( fAllowRemoval
1532 && (pGstPT->a[i].u & X86_PTE_P))
1533 {
1534 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1535 {
1536 *pfFlush = true;
1537 return ++cChanged;
1538 }
1539 }
1540 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1541 {
1542 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1543 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1544 {
1545# ifdef VBOX_STRICT
1546 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1547 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1548 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1549# endif
1550 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1551 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1552 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1553 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1554
1555 if ( uHostAttr == uGuestAttr
1556 && fHostRW <= fGuestRW)
1557 continue;
1558 }
1559 cChanged++;
1560 /* Something was changed, so flush it. */
1561 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1562 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1563 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1564 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1565 }
1566 }
1567 return cChanged;
1568}
1569
1570
1571/**
1572 * Clear references to guest physical memory in a PAE / PAE page table.
1573 *
1574 * @returns nr of changed PTEs
1575 * @param pPool The pool.
1576 * @param pPage The page.
1577 * @param pShwPT The shadow page table (mapping of the page).
1578 * @param pGstPT The guest page table.
1579 * @param pOldGstPT The old cached guest page table.
1580 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1581 * @param pfFlush Flush reused page table (out)
1582 */
1583DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1584 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1585{
1586 unsigned cChanged = 0;
1587
1588# ifdef VBOX_STRICT
1589 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1590 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1591# endif
1592 *pfFlush = false;
1593
1594 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1595 {
1596 /* Check the new value written by the guest. If present and with a bogus physical address, then
1597 * it's fairly safe to assume the guest is reusing the PT.
1598 */
1599 if ( fAllowRemoval
1600 && pGstPT->a[i].n.u1Present)
1601 {
1602 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK))
1603 {
1604 *pfFlush = true;
1605 return ++cChanged;
1606 }
1607 }
1608 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1609 {
1610 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1611 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1612 {
1613# ifdef VBOX_STRICT
1614 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1615 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1616 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1617# endif
1618 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1619 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1620 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1621 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1622
1623 if ( uHostAttr == uGuestAttr
1624 && fHostRW <= fGuestRW)
1625 continue;
1626 }
1627 cChanged++;
1628 /* Something was changed, so flush it. */
1629 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1630 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1631 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1632 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1633 }
1634 }
1635 return cChanged;
1636}
1637
1638
1639/**
1640 * Flush a dirty page
1641 *
1642 * @param pVM The cross context VM structure.
1643 * @param pPool The pool.
1644 * @param idxSlot Dirty array slot index
1645 * @param fAllowRemoval Allow a reused page table to be removed
1646 */
1647static void pgmPoolFlushDirtyPage(PVMCC pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1648{
1649 AssertCompile(RT_ELEMENTS(pPool->aidxDirtyPages) == RT_ELEMENTS(pPool->aDirtyPages));
1650
1651 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1652 unsigned idxPage = pPool->aidxDirtyPages[idxSlot];
1653 if (idxPage == NIL_PGMPOOL_IDX)
1654 return;
1655
1656 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1657 Assert(pPage->idx == idxPage);
1658 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1659
1660 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1661 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1662
1663# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1664 PVMCPU pVCpu = VMMGetCpu(pVM);
1665 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
1666# endif
1667
1668 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1669 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK);
1670 Assert(rc == VINF_SUCCESS);
1671 pPage->fDirty = false;
1672
1673# ifdef VBOX_STRICT
1674 uint64_t fFlags = 0;
1675 RTHCPHYS HCPhys;
1676 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->GCPtrDirtyFault, &fFlags, &HCPhys);
1677 AssertMsg( ( rc == VINF_SUCCESS
1678 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1679 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1680 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1681 || rc == VERR_PAGE_NOT_PRESENT,
1682 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->GCPtrDirtyFault, rc, fFlags));
1683# endif
1684
1685 /* Flush those PTEs that have changed. */
1686 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1687 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1688 void *pvGst;
1689 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1690 bool fFlush;
1691 unsigned cChanges;
1692
1693 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1694 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1695 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1696 else
1697 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1698 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1699
1700 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1701 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1702 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1703 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1704
1705 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1706 Assert(pPage->cModifications);
1707 if (cChanges < 4)
1708 pPage->cModifications = 1; /* must use > 0 here */
1709 else
1710 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1711
1712 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1713 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1714 pPool->idxFreeDirtyPage = idxSlot;
1715
1716 pPool->cDirtyPages--;
1717 pPool->aidxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1718 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1719 if (fFlush)
1720 {
1721 Assert(fAllowRemoval);
1722 Log(("Flush reused page table!\n"));
1723 pgmPoolFlushPage(pPool, pPage);
1724 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1725 }
1726 else
1727 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1728
1729# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1730 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
1731# endif
1732}
1733
1734
1735# ifndef IN_RING3
1736/**
1737 * Add a new dirty page
1738 *
1739 * @param pVM The cross context VM structure.
1740 * @param pPool The pool.
1741 * @param pPage The page.
1742 */
1743void pgmPoolAddDirtyPage(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1744{
1745 PGM_LOCK_ASSERT_OWNER(pVM);
1746 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1747 Assert(!pPage->fDirty);
1748
1749 unsigned idxFree = pPool->idxFreeDirtyPage;
1750 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1751 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1752
1753 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1754 {
1755 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1756 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1757 }
1758 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1759 AssertMsg(pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1760
1761 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1762
1763 /*
1764 * Make a copy of the guest page table as we require valid GCPhys addresses
1765 * when removing references to physical pages.
1766 * (The HCPhys linear lookup is *extremely* expensive!)
1767 */
1768 void *pvGst;
1769 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1770 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst, (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT) ? PAGE_SIZE : PAGE_SIZE/2);
1771# ifdef VBOX_STRICT
1772 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1773 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1774 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1775 else
1776 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1777 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1778# endif
1779 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1780
1781 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1782 pPage->fDirty = true;
1783 pPage->idxDirtyEntry = (uint8_t)idxFree; Assert(pPage->idxDirtyEntry == idxFree);
1784 pPool->aidxDirtyPages[idxFree] = pPage->idx;
1785 pPool->cDirtyPages++;
1786
1787 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1788 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1789 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1790 {
1791 unsigned i;
1792 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1793 {
1794 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1795 if (pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1796 {
1797 pPool->idxFreeDirtyPage = idxFree;
1798 break;
1799 }
1800 }
1801 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1802 }
1803
1804 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1805
1806 /*
1807 * Clear all references to this shadow table. See @bugref{7298}.
1808 */
1809 pgmPoolTrackClearPageUsers(pPool, pPage);
1810}
1811# endif /* !IN_RING3 */
1812
1813
1814/**
1815 * Check if the specified page is dirty (not write monitored)
1816 *
1817 * @return dirty or not
1818 * @param pVM The cross context VM structure.
1819 * @param GCPhys Guest physical address
1820 */
1821bool pgmPoolIsDirtyPageSlow(PVM pVM, RTGCPHYS GCPhys)
1822{
1823 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1824 PGM_LOCK_ASSERT_OWNER(pVM);
1825 if (!pPool->cDirtyPages)
1826 return false;
1827
1828 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1829
1830 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1831 {
1832 unsigned idxPage = pPool->aidxDirtyPages[i];
1833 if (idxPage != NIL_PGMPOOL_IDX)
1834 {
1835 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1836 if (pPage->GCPhys == GCPhys)
1837 return true;
1838 }
1839 }
1840 return false;
1841}
1842
1843
1844/**
1845 * Reset all dirty pages by reinstating page monitoring.
1846 *
1847 * @param pVM The cross context VM structure.
1848 */
1849void pgmPoolResetDirtyPages(PVMCC pVM)
1850{
1851 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1852 PGM_LOCK_ASSERT_OWNER(pVM);
1853 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1854
1855 if (!pPool->cDirtyPages)
1856 return;
1857
1858 Log(("pgmPoolResetDirtyPages\n"));
1859 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1860 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1861
1862 pPool->idxFreeDirtyPage = 0;
1863 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1864 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1865 {
1866 unsigned i;
1867 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1868 {
1869 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
1870 {
1871 pPool->idxFreeDirtyPage = i;
1872 break;
1873 }
1874 }
1875 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1876 }
1877
1878 Assert(pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
1879 return;
1880}
1881
1882
1883/**
1884 * Invalidate the PT entry for the specified page
1885 *
1886 * @param pVM The cross context VM structure.
1887 * @param GCPtrPage Guest page to invalidate
1888 */
1889void pgmPoolResetDirtyPage(PVM pVM, RTGCPTR GCPtrPage)
1890{
1891 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1892 PGM_LOCK_ASSERT_OWNER(pVM);
1893 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1894
1895 if (!pPool->cDirtyPages)
1896 return;
1897
1898 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage)); RT_NOREF_PV(GCPtrPage);
1899 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1900 {
1901 /** @todo What was intended here??? This looks incomplete... */
1902 }
1903}
1904
1905
1906/**
1907 * Reset all dirty pages by reinstating page monitoring.
1908 *
1909 * @param pVM The cross context VM structure.
1910 * @param GCPhysPT Physical address of the page table
1911 */
1912void pgmPoolInvalidateDirtyPage(PVMCC pVM, RTGCPHYS GCPhysPT)
1913{
1914 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1915 PGM_LOCK_ASSERT_OWNER(pVM);
1916 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1917 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
1918
1919 if (!pPool->cDirtyPages)
1920 return;
1921
1922 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1923
1924 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1925 {
1926 unsigned idxPage = pPool->aidxDirtyPages[i];
1927 if (idxPage != NIL_PGMPOOL_IDX)
1928 {
1929 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1930 if (pPage->GCPhys == GCPhysPT)
1931 {
1932 idxDirtyPage = i;
1933 break;
1934 }
1935 }
1936 }
1937
1938 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
1939 {
1940 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
1941 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1942 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1943 {
1944 unsigned i;
1945 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1946 {
1947 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
1948 {
1949 pPool->idxFreeDirtyPage = i;
1950 break;
1951 }
1952 }
1953 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1954 }
1955 }
1956}
1957
1958#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1959
1960/**
1961 * Inserts a page into the GCPhys hash table.
1962 *
1963 * @param pPool The pool.
1964 * @param pPage The page.
1965 */
1966DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1967{
1968 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1969 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1970 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1971 pPage->iNext = pPool->aiHash[iHash];
1972 pPool->aiHash[iHash] = pPage->idx;
1973}
1974
1975
1976/**
1977 * Removes a page from the GCPhys hash table.
1978 *
1979 * @param pPool The pool.
1980 * @param pPage The page.
1981 */
1982DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1983{
1984 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1985 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1986 if (pPool->aiHash[iHash] == pPage->idx)
1987 pPool->aiHash[iHash] = pPage->iNext;
1988 else
1989 {
1990 uint16_t iPrev = pPool->aiHash[iHash];
1991 for (;;)
1992 {
1993 const int16_t i = pPool->aPages[iPrev].iNext;
1994 if (i == pPage->idx)
1995 {
1996 pPool->aPages[iPrev].iNext = pPage->iNext;
1997 break;
1998 }
1999 if (i == NIL_PGMPOOL_IDX)
2000 {
2001 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
2002 break;
2003 }
2004 iPrev = i;
2005 }
2006 }
2007 pPage->iNext = NIL_PGMPOOL_IDX;
2008}
2009
2010
2011/**
2012 * Frees up one cache page.
2013 *
2014 * @returns VBox status code.
2015 * @retval VINF_SUCCESS on success.
2016 * @param pPool The pool.
2017 * @param iUser The user index.
2018 */
2019static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
2020{
2021 const PVMCC pVM = pPool->CTX_SUFF(pVM);
2022 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
2023 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
2024
2025 /*
2026 * Select one page from the tail of the age list.
2027 */
2028 PPGMPOOLPAGE pPage;
2029 for (unsigned iLoop = 0; ; iLoop++)
2030 {
2031 uint16_t iToFree = pPool->iAgeTail;
2032 if (iToFree == iUser && iUser != NIL_PGMPOOL_IDX)
2033 iToFree = pPool->aPages[iToFree].iAgePrev;
2034/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2035 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2036 {
2037 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2038 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2039 {
2040 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2041 continue;
2042 iToFree = i;
2043 break;
2044 }
2045 }
2046*/
2047 Assert(iToFree != iUser);
2048 AssertReleaseMsg(iToFree != NIL_PGMPOOL_IDX,
2049 ("iToFree=%#x (iAgeTail=%#x) iUser=%#x iLoop=%u - pPool=%p LB %#zx\n",
2050 iToFree, pPool->iAgeTail, iUser, iLoop, pPool,
2051 RT_UOFFSETOF_DYN(PGMPOOL, aPages[pPool->cMaxPages])
2052 + pPool->cMaxUsers * sizeof(PGMPOOLUSER)
2053 + pPool->cMaxPhysExts * sizeof(PGMPOOLPHYSEXT) ));
2054
2055 pPage = &pPool->aPages[iToFree];
2056
2057 /*
2058 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2059 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2060 */
2061 if ( !pgmPoolIsPageLocked(pPage)
2062 && pPage->idx >= PGMPOOL_IDX_FIRST /* paranoia (#6349) */)
2063 break;
2064 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2065 pgmPoolCacheUsed(pPool, pPage);
2066 AssertLogRelReturn(iLoop < 8192, VERR_PGM_POOL_TOO_MANY_LOOPS);
2067 }
2068
2069 /*
2070 * Found a usable page, flush it and return.
2071 */
2072 int rc = pgmPoolFlushPage(pPool, pPage);
2073 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2074 /** @todo find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2075 if (rc == VINF_SUCCESS)
2076 PGM_INVL_ALL_VCPU_TLBS(pVM);
2077 return rc;
2078}
2079
2080
2081/**
2082 * Checks if a kind mismatch is really a page being reused
2083 * or if it's just normal remappings.
2084 *
2085 * @returns true if reused and the cached page (enmKind1) should be flushed
2086 * @returns false if not reused.
2087 * @param enmKind1 The kind of the cached page.
2088 * @param enmKind2 The kind of the requested page.
2089 */
2090static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2091{
2092 switch (enmKind1)
2093 {
2094 /*
2095 * Never reuse them. There is no remapping in non-paging mode.
2096 */
2097 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2098 case PGMPOOLKIND_32BIT_PD_PHYS:
2099 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2100 case PGMPOOLKIND_PAE_PD_PHYS:
2101 case PGMPOOLKIND_PAE_PDPT_PHYS:
2102 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2103 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2104 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2105 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2106 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2107 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2108 return false;
2109
2110 /*
2111 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2112 */
2113 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2114 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2115 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2116 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2117 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2118 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2119 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2120 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2121 case PGMPOOLKIND_32BIT_PD:
2122 case PGMPOOLKIND_PAE_PDPT:
2123 switch (enmKind2)
2124 {
2125 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2126 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2127 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2128 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2129 case PGMPOOLKIND_64BIT_PML4:
2130 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2131 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2132 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2133 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2134 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2135 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2136 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2137 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2138 return true;
2139 default:
2140 return false;
2141 }
2142
2143 /*
2144 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2145 */
2146 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2147 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2148 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2149 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2150 case PGMPOOLKIND_64BIT_PML4:
2151 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2152 switch (enmKind2)
2153 {
2154 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2155 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2156 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2157 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2158 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2159 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2160 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2161 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2162 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2163 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2164 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2165 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2166 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2167 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2168 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2169 return true;
2170 default:
2171 return false;
2172 }
2173
2174 /*
2175 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2176 */
2177 case PGMPOOLKIND_ROOT_NESTED:
2178 return false;
2179
2180 default:
2181 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2182 }
2183}
2184
2185
2186/**
2187 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2188 *
2189 * @returns VBox status code.
2190 * @retval VINF_PGM_CACHED_PAGE on success.
2191 * @retval VERR_FILE_NOT_FOUND if not found.
2192 * @param pPool The pool.
2193 * @param GCPhys The GC physical address of the page we're gonna shadow.
2194 * @param enmKind The kind of mapping.
2195 * @param enmAccess Access type for the mapping (only relevant for big pages)
2196 * @param fA20Enabled Whether the CPU has the A20 gate enabled.
2197 * @param iUser The shadow page pool index of the user table. This is
2198 * NIL_PGMPOOL_IDX for root pages.
2199 * @param iUserTable The index into the user table (shadowed). Ignored if
2200 * root page
2201 * @param ppPage Where to store the pointer to the page.
2202 */
2203static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
2204 uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2205{
2206 /*
2207 * Look up the GCPhys in the hash.
2208 */
2209 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2210 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2211 if (i != NIL_PGMPOOL_IDX)
2212 {
2213 do
2214 {
2215 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2216 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2217 if (pPage->GCPhys == GCPhys)
2218 {
2219 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2220 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess
2221 && pPage->fA20Enabled == fA20Enabled)
2222 {
2223 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2224 * doesn't flush it in case there are no more free use records.
2225 */
2226 pgmPoolCacheUsed(pPool, pPage);
2227
2228 int rc = VINF_SUCCESS;
2229 if (iUser != NIL_PGMPOOL_IDX)
2230 rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2231 if (RT_SUCCESS(rc))
2232 {
2233 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2234 *ppPage = pPage;
2235 if (pPage->cModifications)
2236 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2237 STAM_COUNTER_INC(&pPool->StatCacheHits);
2238 return VINF_PGM_CACHED_PAGE;
2239 }
2240 return rc;
2241 }
2242
2243 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2244 {
2245 /*
2246 * The kind is different. In some cases we should now flush the page
2247 * as it has been reused, but in most cases this is normal remapping
2248 * of PDs as PT or big pages using the GCPhys field in a slightly
2249 * different way than the other kinds.
2250 */
2251 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2252 {
2253 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2254 pgmPoolFlushPage(pPool, pPage);
2255 break;
2256 }
2257 }
2258 }
2259
2260 /* next */
2261 i = pPage->iNext;
2262 } while (i != NIL_PGMPOOL_IDX);
2263 }
2264
2265 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2266 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2267 return VERR_FILE_NOT_FOUND;
2268}
2269
2270
2271/**
2272 * Inserts a page into the cache.
2273 *
2274 * @param pPool The pool.
2275 * @param pPage The cached page.
2276 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2277 */
2278static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2279{
2280 /*
2281 * Insert into the GCPhys hash if the page is fit for that.
2282 */
2283 Assert(!pPage->fCached);
2284 if (fCanBeCached)
2285 {
2286 pPage->fCached = true;
2287 pgmPoolHashInsert(pPool, pPage);
2288 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2289 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2290 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2291 }
2292 else
2293 {
2294 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2295 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2296 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2297 }
2298
2299 /*
2300 * Insert at the head of the age list.
2301 */
2302 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2303 pPage->iAgeNext = pPool->iAgeHead;
2304 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2305 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2306 else
2307 pPool->iAgeTail = pPage->idx;
2308 pPool->iAgeHead = pPage->idx;
2309}
2310
2311
2312/**
2313 * Flushes a cached page.
2314 *
2315 * @param pPool The pool.
2316 * @param pPage The cached page.
2317 */
2318static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2319{
2320 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2321
2322 /*
2323 * Remove the page from the hash.
2324 */
2325 if (pPage->fCached)
2326 {
2327 pPage->fCached = false;
2328 pgmPoolHashRemove(pPool, pPage);
2329 }
2330 else
2331 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2332
2333 /*
2334 * Remove it from the age list.
2335 */
2336 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2337 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2338 else
2339 pPool->iAgeTail = pPage->iAgePrev;
2340 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2341 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2342 else
2343 pPool->iAgeHead = pPage->iAgeNext;
2344 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2345 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2346}
2347
2348
2349/**
2350 * Looks for pages sharing the monitor.
2351 *
2352 * @returns Pointer to the head page.
2353 * @returns NULL if not found.
2354 * @param pPool The Pool
2355 * @param pNewPage The page which is going to be monitored.
2356 */
2357static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2358{
2359 /*
2360 * Look up the GCPhys in the hash.
2361 */
2362 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2363 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2364 if (i == NIL_PGMPOOL_IDX)
2365 return NULL;
2366 do
2367 {
2368 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2369 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2370 && pPage != pNewPage)
2371 {
2372 switch (pPage->enmKind)
2373 {
2374 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2375 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2376 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2377 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2378 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2379 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2380 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2381 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2382 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2383 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2384 case PGMPOOLKIND_64BIT_PML4:
2385 case PGMPOOLKIND_32BIT_PD:
2386 case PGMPOOLKIND_PAE_PDPT:
2387 {
2388 /* find the head */
2389 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2390 {
2391 Assert(pPage->iMonitoredPrev != pPage->idx);
2392 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2393 }
2394 return pPage;
2395 }
2396
2397 /* ignore, no monitoring. */
2398 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2399 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2400 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2401 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2402 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2403 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2404 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2405 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2406 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2407 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2408 case PGMPOOLKIND_ROOT_NESTED:
2409 case PGMPOOLKIND_PAE_PD_PHYS:
2410 case PGMPOOLKIND_PAE_PDPT_PHYS:
2411 case PGMPOOLKIND_32BIT_PD_PHYS:
2412 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2413 break;
2414 default:
2415 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2416 }
2417 }
2418
2419 /* next */
2420 i = pPage->iNext;
2421 } while (i != NIL_PGMPOOL_IDX);
2422 return NULL;
2423}
2424
2425
2426/**
2427 * Enabled write monitoring of a guest page.
2428 *
2429 * @returns VBox status code.
2430 * @retval VINF_SUCCESS on success.
2431 * @param pPool The pool.
2432 * @param pPage The cached page.
2433 */
2434static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2435{
2436 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2437
2438 /*
2439 * Filter out the relevant kinds.
2440 */
2441 switch (pPage->enmKind)
2442 {
2443 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2444 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2445 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2446 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2447 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2448 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2449 case PGMPOOLKIND_64BIT_PML4:
2450 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2451 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2452 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2453 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2454 case PGMPOOLKIND_32BIT_PD:
2455 case PGMPOOLKIND_PAE_PDPT:
2456 break;
2457
2458 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2459 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2460 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2461 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2462 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2463 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2464 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2465 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2466 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2467 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2468 case PGMPOOLKIND_ROOT_NESTED:
2469 /* Nothing to monitor here. */
2470 return VINF_SUCCESS;
2471
2472 case PGMPOOLKIND_32BIT_PD_PHYS:
2473 case PGMPOOLKIND_PAE_PDPT_PHYS:
2474 case PGMPOOLKIND_PAE_PD_PHYS:
2475 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2476 /* Nothing to monitor here. */
2477 return VINF_SUCCESS;
2478 default:
2479 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2480 }
2481
2482 /*
2483 * Install handler.
2484 */
2485 int rc;
2486 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2487 if (pPageHead)
2488 {
2489 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2490 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2491
2492#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2493 if (pPageHead->fDirty)
2494 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirtyEntry, false /* do not remove */);
2495#endif
2496
2497 pPage->iMonitoredPrev = pPageHead->idx;
2498 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2499 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2500 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2501 pPageHead->iMonitoredNext = pPage->idx;
2502 rc = VINF_SUCCESS;
2503 }
2504 else
2505 {
2506 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2507 PVMCC pVM = pPool->CTX_SUFF(pVM);
2508 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2509 rc = PGMHandlerPhysicalRegister(pVM, GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK, pPool->hAccessHandlerType,
2510 MMHyperCCToR3(pVM, pPage), MMHyperCCToR0(pVM, pPage), MMHyperCCToRC(pVM, pPage),
2511 NIL_RTR3PTR /*pszDesc*/);
2512 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2513 * the heap size should suffice. */
2514 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2515 PVMCPU pVCpu = VMMGetCpu(pVM);
2516 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2517 }
2518 pPage->fMonitored = true;
2519 return rc;
2520}
2521
2522
2523/**
2524 * Disables write monitoring of a guest page.
2525 *
2526 * @returns VBox status code.
2527 * @retval VINF_SUCCESS on success.
2528 * @param pPool The pool.
2529 * @param pPage The cached page.
2530 */
2531static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2532{
2533 /*
2534 * Filter out the relevant kinds.
2535 */
2536 switch (pPage->enmKind)
2537 {
2538 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2539 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2540 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2541 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2542 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2543 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2544 case PGMPOOLKIND_64BIT_PML4:
2545 case PGMPOOLKIND_32BIT_PD:
2546 case PGMPOOLKIND_PAE_PDPT:
2547 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2548 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2549 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2550 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2551 break;
2552
2553 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2554 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2555 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2556 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2557 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2558 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2559 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2560 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2561 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2562 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2563 case PGMPOOLKIND_ROOT_NESTED:
2564 case PGMPOOLKIND_PAE_PD_PHYS:
2565 case PGMPOOLKIND_PAE_PDPT_PHYS:
2566 case PGMPOOLKIND_32BIT_PD_PHYS:
2567 /* Nothing to monitor here. */
2568 Assert(!pPage->fMonitored);
2569 return VINF_SUCCESS;
2570
2571 default:
2572 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2573 }
2574 Assert(pPage->fMonitored);
2575
2576 /*
2577 * Remove the page from the monitored list or uninstall it if last.
2578 */
2579 const PVMCC pVM = pPool->CTX_SUFF(pVM);
2580 int rc;
2581 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2582 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2583 {
2584 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2585 {
2586 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2587 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2588 rc = PGMHandlerPhysicalChangeUserArgs(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK,
2589 MMHyperCCToR3(pVM, pNewHead), MMHyperCCToR0(pVM, pNewHead));
2590
2591 AssertFatalRCSuccess(rc);
2592 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2593 }
2594 else
2595 {
2596 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2597 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2598 {
2599 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2600 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2601 }
2602 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2603 rc = VINF_SUCCESS;
2604 }
2605 }
2606 else
2607 {
2608 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2609 AssertFatalRC(rc);
2610 PVMCPU pVCpu = VMMGetCpu(pVM);
2611 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2612 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2613 }
2614 pPage->fMonitored = false;
2615
2616 /*
2617 * Remove it from the list of modified pages (if in it).
2618 */
2619 pgmPoolMonitorModifiedRemove(pPool, pPage);
2620
2621 return rc;
2622}
2623
2624
2625/**
2626 * Inserts the page into the list of modified pages.
2627 *
2628 * @param pPool The pool.
2629 * @param pPage The page.
2630 */
2631void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2632{
2633 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2634 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2635 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2636 && pPool->iModifiedHead != pPage->idx,
2637 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2638 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2639 pPool->iModifiedHead, pPool->cModifiedPages));
2640
2641 pPage->iModifiedNext = pPool->iModifiedHead;
2642 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2643 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2644 pPool->iModifiedHead = pPage->idx;
2645 pPool->cModifiedPages++;
2646#ifdef VBOX_WITH_STATISTICS
2647 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2648 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2649#endif
2650}
2651
2652
2653/**
2654 * Removes the page from the list of modified pages and resets the
2655 * modification counter.
2656 *
2657 * @param pPool The pool.
2658 * @param pPage The page which is believed to be in the list of modified pages.
2659 */
2660static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2661{
2662 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2663 if (pPool->iModifiedHead == pPage->idx)
2664 {
2665 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2666 pPool->iModifiedHead = pPage->iModifiedNext;
2667 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2668 {
2669 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2670 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2671 }
2672 pPool->cModifiedPages--;
2673 }
2674 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2675 {
2676 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2677 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2678 {
2679 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2680 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2681 }
2682 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2683 pPool->cModifiedPages--;
2684 }
2685 else
2686 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2687 pPage->cModifications = 0;
2688}
2689
2690
2691/**
2692 * Zaps the list of modified pages, resetting their modification counters in the process.
2693 *
2694 * @param pVM The cross context VM structure.
2695 */
2696static void pgmPoolMonitorModifiedClearAll(PVMCC pVM)
2697{
2698 pgmLock(pVM);
2699 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2700 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2701
2702 unsigned cPages = 0; NOREF(cPages);
2703
2704#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2705 pgmPoolResetDirtyPages(pVM);
2706#endif
2707
2708 uint16_t idx = pPool->iModifiedHead;
2709 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2710 while (idx != NIL_PGMPOOL_IDX)
2711 {
2712 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2713 idx = pPage->iModifiedNext;
2714 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2715 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2716 pPage->cModifications = 0;
2717 Assert(++cPages);
2718 }
2719 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2720 pPool->cModifiedPages = 0;
2721 pgmUnlock(pVM);
2722}
2723
2724
2725/**
2726 * Handle SyncCR3 pool tasks
2727 *
2728 * @returns VBox status code.
2729 * @retval VINF_SUCCESS if successfully added.
2730 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2731 * @param pVCpu The cross context virtual CPU structure.
2732 * @remark Should only be used when monitoring is available, thus placed in
2733 * the PGMPOOL_WITH_MONITORING \#ifdef.
2734 */
2735int pgmPoolSyncCR3(PVMCPUCC pVCpu)
2736{
2737 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2738 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2739
2740 /*
2741 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2742 * Occasionally we will have to clear all the shadow page tables because we wanted
2743 * to monitor a page which was mapped by too many shadowed page tables. This operation
2744 * sometimes referred to as a 'lightweight flush'.
2745 */
2746# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2747 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2748 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2749# else /* !IN_RING3 */
2750 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2751 {
2752 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2753 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2754
2755 /* Make sure all other VCPUs return to ring 3. */
2756 if (pVM->cCpus > 1)
2757 {
2758 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2759 PGM_INVL_ALL_VCPU_TLBS(pVM);
2760 }
2761 return VINF_PGM_SYNC_CR3;
2762 }
2763# endif /* !IN_RING3 */
2764 else
2765 {
2766 pgmPoolMonitorModifiedClearAll(pVM);
2767
2768 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2769 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2770 {
2771 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2772 return pgmPoolSyncCR3(pVCpu);
2773 }
2774 }
2775 return VINF_SUCCESS;
2776}
2777
2778
2779/**
2780 * Frees up at least one user entry.
2781 *
2782 * @returns VBox status code.
2783 * @retval VINF_SUCCESS if successfully added.
2784 *
2785 * @param pPool The pool.
2786 * @param iUser The user index.
2787 */
2788static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2789{
2790 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2791 /*
2792 * Just free cached pages in a braindead fashion.
2793 */
2794 /** @todo walk the age list backwards and free the first with usage. */
2795 int rc = VINF_SUCCESS;
2796 do
2797 {
2798 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2799 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2800 rc = rc2;
2801 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2802 return rc;
2803}
2804
2805
2806/**
2807 * Inserts a page into the cache.
2808 *
2809 * This will create user node for the page, insert it into the GCPhys
2810 * hash, and insert it into the age list.
2811 *
2812 * @returns VBox status code.
2813 * @retval VINF_SUCCESS if successfully added.
2814 *
2815 * @param pPool The pool.
2816 * @param pPage The cached page.
2817 * @param GCPhys The GC physical address of the page we're gonna shadow.
2818 * @param iUser The user index.
2819 * @param iUserTable The user table index.
2820 */
2821DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2822{
2823 int rc = VINF_SUCCESS;
2824 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2825
2826 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable)); RT_NOREF_PV(GCPhys);
2827
2828 if (iUser != NIL_PGMPOOL_IDX)
2829 {
2830#ifdef VBOX_STRICT
2831 /*
2832 * Check that the entry doesn't already exists.
2833 */
2834 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2835 {
2836 uint16_t i = pPage->iUserHead;
2837 do
2838 {
2839 Assert(i < pPool->cMaxUsers);
2840 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2841 i = paUsers[i].iNext;
2842 } while (i != NIL_PGMPOOL_USER_INDEX);
2843 }
2844#endif
2845
2846 /*
2847 * Find free a user node.
2848 */
2849 uint16_t i = pPool->iUserFreeHead;
2850 if (i == NIL_PGMPOOL_USER_INDEX)
2851 {
2852 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2853 if (RT_FAILURE(rc))
2854 return rc;
2855 i = pPool->iUserFreeHead;
2856 }
2857
2858 /*
2859 * Unlink the user node from the free list,
2860 * initialize and insert it into the user list.
2861 */
2862 pPool->iUserFreeHead = paUsers[i].iNext;
2863 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2864 paUsers[i].iUser = iUser;
2865 paUsers[i].iUserTable = iUserTable;
2866 pPage->iUserHead = i;
2867 }
2868 else
2869 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2870
2871
2872 /*
2873 * Insert into cache and enable monitoring of the guest page if enabled.
2874 *
2875 * Until we implement caching of all levels, including the CR3 one, we'll
2876 * have to make sure we don't try monitor & cache any recursive reuse of
2877 * a monitored CR3 page. Because all windows versions are doing this we'll
2878 * have to be able to do combined access monitoring, CR3 + PT and
2879 * PD + PT (guest PAE).
2880 *
2881 * Update:
2882 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2883 */
2884 const bool fCanBeMonitored = true;
2885 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2886 if (fCanBeMonitored)
2887 {
2888 rc = pgmPoolMonitorInsert(pPool, pPage);
2889 AssertRC(rc);
2890 }
2891 return rc;
2892}
2893
2894
2895/**
2896 * Adds a user reference to a page.
2897 *
2898 * This will move the page to the head of the
2899 *
2900 * @returns VBox status code.
2901 * @retval VINF_SUCCESS if successfully added.
2902 *
2903 * @param pPool The pool.
2904 * @param pPage The cached page.
2905 * @param iUser The user index.
2906 * @param iUserTable The user table.
2907 */
2908static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2909{
2910 Log3(("pgmPoolTrackAddUser: GCPhys=%RGp iUser=%x iUserTable=%x\n", pPage->GCPhys, iUser, iUserTable));
2911 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2912 Assert(iUser != NIL_PGMPOOL_IDX);
2913
2914# ifdef VBOX_STRICT
2915 /*
2916 * Check that the entry doesn't already exists. We only allow multiple
2917 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
2918 */
2919 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2920 {
2921 uint16_t i = pPage->iUserHead;
2922 do
2923 {
2924 Assert(i < pPool->cMaxUsers);
2925 /** @todo this assertion looks odd... Shouldn't it be && here? */
2926 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2927 i = paUsers[i].iNext;
2928 } while (i != NIL_PGMPOOL_USER_INDEX);
2929 }
2930# endif
2931
2932 /*
2933 * Allocate a user node.
2934 */
2935 uint16_t i = pPool->iUserFreeHead;
2936 if (i == NIL_PGMPOOL_USER_INDEX)
2937 {
2938 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2939 if (RT_FAILURE(rc))
2940 return rc;
2941 i = pPool->iUserFreeHead;
2942 }
2943 pPool->iUserFreeHead = paUsers[i].iNext;
2944
2945 /*
2946 * Initialize the user node and insert it.
2947 */
2948 paUsers[i].iNext = pPage->iUserHead;
2949 paUsers[i].iUser = iUser;
2950 paUsers[i].iUserTable = iUserTable;
2951 pPage->iUserHead = i;
2952
2953# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2954 if (pPage->fDirty)
2955 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirtyEntry, false /* do not remove */);
2956# endif
2957
2958 /*
2959 * Tell the cache to update its replacement stats for this page.
2960 */
2961 pgmPoolCacheUsed(pPool, pPage);
2962 return VINF_SUCCESS;
2963}
2964
2965
2966/**
2967 * Frees a user record associated with a page.
2968 *
2969 * This does not clear the entry in the user table, it simply replaces the
2970 * user record to the chain of free records.
2971 *
2972 * @param pPool The pool.
2973 * @param pPage The shadow page.
2974 * @param iUser The shadow page pool index of the user table.
2975 * @param iUserTable The index into the user table (shadowed).
2976 *
2977 * @remarks Don't call this for root pages.
2978 */
2979static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2980{
2981 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2982 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2983 Assert(iUser != NIL_PGMPOOL_IDX);
2984
2985 /*
2986 * Unlink and free the specified user entry.
2987 */
2988
2989 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2990 uint16_t i = pPage->iUserHead;
2991 if ( i != NIL_PGMPOOL_USER_INDEX
2992 && paUsers[i].iUser == iUser
2993 && paUsers[i].iUserTable == iUserTable)
2994 {
2995 pPage->iUserHead = paUsers[i].iNext;
2996
2997 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2998 paUsers[i].iNext = pPool->iUserFreeHead;
2999 pPool->iUserFreeHead = i;
3000 return;
3001 }
3002
3003 /* General: Linear search. */
3004 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
3005 while (i != NIL_PGMPOOL_USER_INDEX)
3006 {
3007 if ( paUsers[i].iUser == iUser
3008 && paUsers[i].iUserTable == iUserTable)
3009 {
3010 if (iPrev != NIL_PGMPOOL_USER_INDEX)
3011 paUsers[iPrev].iNext = paUsers[i].iNext;
3012 else
3013 pPage->iUserHead = paUsers[i].iNext;
3014
3015 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3016 paUsers[i].iNext = pPool->iUserFreeHead;
3017 pPool->iUserFreeHead = i;
3018 return;
3019 }
3020 iPrev = i;
3021 i = paUsers[i].iNext;
3022 }
3023
3024 /* Fatal: didn't find it */
3025 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
3026 iUser, iUserTable, pPage->GCPhys));
3027}
3028
3029
3030#if 0 /* unused */
3031/**
3032 * Gets the entry size of a shadow table.
3033 *
3034 * @param enmKind The kind of page.
3035 *
3036 * @returns The size of the entry in bytes. That is, 4 or 8.
3037 * @returns If the kind is not for a table, an assertion is raised and 0 is
3038 * returned.
3039 */
3040DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
3041{
3042 switch (enmKind)
3043 {
3044 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3045 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3046 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3047 case PGMPOOLKIND_32BIT_PD:
3048 case PGMPOOLKIND_32BIT_PD_PHYS:
3049 return 4;
3050
3051 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3052 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3053 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3054 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3055 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3056 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3057 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3058 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3059 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3060 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3061 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3062 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3063 case PGMPOOLKIND_64BIT_PML4:
3064 case PGMPOOLKIND_PAE_PDPT:
3065 case PGMPOOLKIND_ROOT_NESTED:
3066 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3067 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3068 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3069 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3070 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3071 case PGMPOOLKIND_PAE_PD_PHYS:
3072 case PGMPOOLKIND_PAE_PDPT_PHYS:
3073 return 8;
3074
3075 default:
3076 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3077 }
3078}
3079#endif /* unused */
3080
3081#if 0 /* unused */
3082/**
3083 * Gets the entry size of a guest table.
3084 *
3085 * @param enmKind The kind of page.
3086 *
3087 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3088 * @returns If the kind is not for a table, an assertion is raised and 0 is
3089 * returned.
3090 */
3091DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3092{
3093 switch (enmKind)
3094 {
3095 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3096 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3097 case PGMPOOLKIND_32BIT_PD:
3098 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3099 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3100 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3101 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3102 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3103 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3104 return 4;
3105
3106 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3107 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3108 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3109 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3110 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3111 case PGMPOOLKIND_64BIT_PML4:
3112 case PGMPOOLKIND_PAE_PDPT:
3113 return 8;
3114
3115 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3116 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3117 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3118 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3119 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3120 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3121 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3122 case PGMPOOLKIND_ROOT_NESTED:
3123 case PGMPOOLKIND_PAE_PD_PHYS:
3124 case PGMPOOLKIND_PAE_PDPT_PHYS:
3125 case PGMPOOLKIND_32BIT_PD_PHYS:
3126 /** @todo can we return 0? (nobody is calling this...) */
3127 AssertFailed();
3128 return 0;
3129
3130 default:
3131 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3132 }
3133}
3134#endif /* unused */
3135
3136
3137/**
3138 * Checks one shadow page table entry for a mapping of a physical page.
3139 *
3140 * @returns true / false indicating removal of all relevant PTEs
3141 *
3142 * @param pVM The cross context VM structure.
3143 * @param pPhysPage The guest page in question.
3144 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3145 * @param iShw The shadow page table.
3146 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3147 */
3148static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3149{
3150 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3151 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3152 bool fRet = false;
3153
3154 /*
3155 * Assert sanity.
3156 */
3157 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3158 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3159 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3160
3161 /*
3162 * Then, clear the actual mappings to the page in the shadow PT.
3163 */
3164 switch (pPage->enmKind)
3165 {
3166 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3167 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3168 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3169 {
3170 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3171 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3172 uint32_t u32AndMask = 0;
3173 uint32_t u32OrMask = 0;
3174
3175 if (!fFlushPTEs)
3176 {
3177 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3178 {
3179 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3180 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3181 u32OrMask = X86_PTE_RW;
3182 u32AndMask = UINT32_MAX;
3183 fRet = true;
3184 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3185 break;
3186
3187 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3188 u32OrMask = 0;
3189 u32AndMask = ~X86_PTE_RW;
3190 fRet = true;
3191 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3192 break;
3193 default:
3194 /* (shouldn't be here, will assert below) */
3195 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3196 break;
3197 }
3198 }
3199 else
3200 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3201
3202 /* Update the counter if we're removing references. */
3203 if (!u32AndMask)
3204 {
3205 Assert(pPage->cPresent);
3206 Assert(pPool->cPresent);
3207 pPage->cPresent--;
3208 pPool->cPresent--;
3209 }
3210
3211 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3212 {
3213 X86PTE Pte;
3214
3215 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3216 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3217 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3218 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3219
3220 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3221 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3222 return fRet;
3223 }
3224#ifdef LOG_ENABLED
3225 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3226 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3227 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3228 {
3229 Log(("i=%d cFound=%d\n", i, ++cFound));
3230 }
3231#endif
3232 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3233 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3234 break;
3235 }
3236
3237 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3238 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3239 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3240 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3241 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3242 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3243 {
3244 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3245 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3246 uint64_t u64OrMask = 0;
3247 uint64_t u64AndMask = 0;
3248
3249 if (!fFlushPTEs)
3250 {
3251 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3252 {
3253 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3254 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3255 u64OrMask = X86_PTE_RW;
3256 u64AndMask = UINT64_MAX;
3257 fRet = true;
3258 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3259 break;
3260
3261 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3262 u64OrMask = 0;
3263 u64AndMask = ~(uint64_t)X86_PTE_RW;
3264 fRet = true;
3265 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3266 break;
3267
3268 default:
3269 /* (shouldn't be here, will assert below) */
3270 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3271 break;
3272 }
3273 }
3274 else
3275 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3276
3277 /* Update the counter if we're removing references. */
3278 if (!u64AndMask)
3279 {
3280 Assert(pPage->cPresent);
3281 Assert(pPool->cPresent);
3282 pPage->cPresent--;
3283 pPool->cPresent--;
3284 }
3285
3286 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3287 {
3288 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3289 X86PTEPAE Pte;
3290 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3291 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3292 Pte.u &= ~(X86PGPAEUINT)X86_PTE_RW; /* need to disallow writes when dirty bit tracking is still active. */
3293
3294 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3295 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3296 return fRet;
3297 }
3298#ifdef LOG_ENABLED
3299 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3300 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3301 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3302 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3303 Log(("i=%d cFound=%d\n", i, ++cFound));
3304#endif
3305 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3306 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3307 break;
3308 }
3309
3310#ifdef PGM_WITH_LARGE_PAGES
3311 /* Large page case only. */
3312 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3313 {
3314 Assert(pVM->pgm.s.fNestedPaging);
3315
3316 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3317 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3318
3319 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3320 {
3321 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3322 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3323 pPD->a[iPte].u = 0;
3324 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3325
3326 /* Update the counter as we're removing references. */
3327 Assert(pPage->cPresent);
3328 Assert(pPool->cPresent);
3329 pPage->cPresent--;
3330 pPool->cPresent--;
3331
3332 return fRet;
3333 }
3334# ifdef LOG_ENABLED
3335 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3336 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3337 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3338 Log(("i=%d cFound=%d\n", i, ++cFound));
3339# endif
3340 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3341 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3342 break;
3343 }
3344
3345 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3346 case PGMPOOLKIND_PAE_PD_PHYS:
3347 {
3348 Assert(pVM->pgm.s.fNestedPaging);
3349
3350 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3351 PX86PDPAE pPD = (PX86PDPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3352
3353 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3354 {
3355 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3356 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3357 pPD->a[iPte].u = 0;
3358 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3359
3360 /* Update the counter as we're removing references. */
3361 Assert(pPage->cPresent);
3362 Assert(pPool->cPresent);
3363 pPage->cPresent--;
3364 pPool->cPresent--;
3365 return fRet;
3366 }
3367# ifdef LOG_ENABLED
3368 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3369 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3370 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3371 Log(("i=%d cFound=%d\n", i, ++cFound));
3372# endif
3373 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3374 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3375 break;
3376 }
3377#endif /* PGM_WITH_LARGE_PAGES */
3378
3379 default:
3380 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3381 }
3382
3383 /* not reached. */
3384#ifndef _MSC_VER
3385 return fRet;
3386#endif
3387}
3388
3389
3390/**
3391 * Scans one shadow page table for mappings of a physical page.
3392 *
3393 * @param pVM The cross context VM structure.
3394 * @param pPhysPage The guest page in question.
3395 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3396 * @param iShw The shadow page table.
3397 */
3398static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3399{
3400 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3401
3402 /* We should only come here with when there's only one reference to this physical page. */
3403 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3404
3405 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3406 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3407 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3408 if (!fKeptPTEs)
3409 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3410 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3411}
3412
3413
3414/**
3415 * Flushes a list of shadow page tables mapping the same physical page.
3416 *
3417 * @param pVM The cross context VM structure.
3418 * @param pPhysPage The guest page in question.
3419 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3420 * @param iPhysExt The physical cross reference extent list to flush.
3421 */
3422static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3423{
3424 PGM_LOCK_ASSERT_OWNER(pVM);
3425 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3426 bool fKeepList = false;
3427
3428 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3429 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt=%u\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3430
3431 const uint16_t iPhysExtStart = iPhysExt;
3432 PPGMPOOLPHYSEXT pPhysExt;
3433 do
3434 {
3435 Assert(iPhysExt < pPool->cMaxPhysExts);
3436 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3437 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3438 {
3439 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3440 {
3441 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3442 if (!fKeptPTEs)
3443 {
3444 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3445 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3446 }
3447 else
3448 fKeepList = true;
3449 }
3450 }
3451 /* next */
3452 iPhysExt = pPhysExt->iNext;
3453 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3454
3455 if (!fKeepList)
3456 {
3457 /* insert the list into the free list and clear the ram range entry. */
3458 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3459 pPool->iPhysExtFreeHead = iPhysExtStart;
3460 /* Invalidate the tracking data. */
3461 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3462 }
3463
3464 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3465}
3466
3467
3468/**
3469 * Flushes all shadow page table mappings of the given guest page.
3470 *
3471 * This is typically called when the host page backing the guest one has been
3472 * replaced or when the page protection was changed due to a guest access
3473 * caught by the monitoring.
3474 *
3475 * @returns VBox status code.
3476 * @retval VINF_SUCCESS if all references has been successfully cleared.
3477 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3478 * pool cleaning. FF and sync flags are set.
3479 *
3480 * @param pVM The cross context VM structure.
3481 * @param GCPhysPage GC physical address of the page in question
3482 * @param pPhysPage The guest page in question.
3483 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3484 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3485 * flushed, it is NOT touched if this isn't necessary.
3486 * The caller MUST initialized this to @a false.
3487 */
3488int pgmPoolTrackUpdateGCPhys(PVMCC pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3489{
3490 PVMCPUCC pVCpu = VMMGetCpu(pVM);
3491 pgmLock(pVM);
3492 int rc = VINF_SUCCESS;
3493
3494#ifdef PGM_WITH_LARGE_PAGES
3495 /* Is this page part of a large page? */
3496 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3497 {
3498 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3499 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3500
3501 /* Fetch the large page base. */
3502 PPGMPAGE pLargePage;
3503 if (GCPhysBase != GCPhysPage)
3504 {
3505 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3506 AssertFatal(pLargePage);
3507 }
3508 else
3509 pLargePage = pPhysPage;
3510
3511 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3512
3513 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3514 {
3515 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3516 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3517 pVM->pgm.s.cLargePagesDisabled++;
3518
3519 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3520 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3521
3522 *pfFlushTLBs = true;
3523 pgmUnlock(pVM);
3524 return rc;
3525 }
3526 }
3527#else
3528 NOREF(GCPhysPage);
3529#endif /* PGM_WITH_LARGE_PAGES */
3530
3531 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3532 if (u16)
3533 {
3534 /*
3535 * The zero page is currently screwing up the tracking and we'll
3536 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3537 * is defined, zero pages won't normally be mapped. Some kind of solution
3538 * will be needed for this problem of course, but it will have to wait...
3539 */
3540 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3541 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3542 rc = VINF_PGM_GCPHYS_ALIASED;
3543 else
3544 {
3545# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 /** @todo we can drop this now. */
3546 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3547 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3548 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
3549# endif
3550
3551 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3552 {
3553 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3554 pgmPoolTrackFlushGCPhysPT(pVM,
3555 pPhysPage,
3556 fFlushPTEs,
3557 PGMPOOL_TD_GET_IDX(u16));
3558 }
3559 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3560 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3561 else
3562 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3563 *pfFlushTLBs = true;
3564
3565# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3566 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
3567# endif
3568 }
3569 }
3570
3571 if (rc == VINF_PGM_GCPHYS_ALIASED)
3572 {
3573 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3574 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3575 rc = VINF_PGM_SYNC_CR3;
3576 }
3577 pgmUnlock(pVM);
3578 return rc;
3579}
3580
3581
3582/**
3583 * Scans all shadow page tables for mappings of a physical page.
3584 *
3585 * This may be slow, but it's most likely more efficient than cleaning
3586 * out the entire page pool / cache.
3587 *
3588 * @returns VBox status code.
3589 * @retval VINF_SUCCESS if all references has been successfully cleared.
3590 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3591 * a page pool cleaning.
3592 *
3593 * @param pVM The cross context VM structure.
3594 * @param pPhysPage The guest page in question.
3595 */
3596int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage)
3597{
3598 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3599 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3600 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3601 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3602
3603 /*
3604 * There is a limit to what makes sense.
3605 */
3606 if ( pPool->cPresent > 1024
3607 && pVM->cCpus == 1)
3608 {
3609 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3610 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3611 return VINF_PGM_GCPHYS_ALIASED;
3612 }
3613
3614 /*
3615 * Iterate all the pages until we've encountered all that in use.
3616 * This is simple but not quite optimal solution.
3617 */
3618 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage);
3619 unsigned cLeft = pPool->cUsedPages;
3620 unsigned iPage = pPool->cCurPages;
3621 while (--iPage >= PGMPOOL_IDX_FIRST)
3622 {
3623 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3624 if ( pPage->GCPhys != NIL_RTGCPHYS
3625 && pPage->cPresent)
3626 {
3627 switch (pPage->enmKind)
3628 {
3629 /*
3630 * We only care about shadow page tables.
3631 */
3632 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3633 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3634 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3635 {
3636 const uint32_t u32 = (uint32_t)u64;
3637 unsigned cPresent = pPage->cPresent;
3638 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3639 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3640 if (pPT->a[i].n.u1Present)
3641 {
3642 if ((pPT->a[i].u & X86_PTE_PG_MASK) == u32)
3643 {
3644 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3645 pPT->a[i].u = 0;
3646
3647 /* Update the counter as we're removing references. */
3648 Assert(pPage->cPresent);
3649 Assert(pPool->cPresent);
3650 pPage->cPresent--;
3651 pPool->cPresent--;
3652 }
3653 if (!--cPresent)
3654 break;
3655 }
3656 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3657 break;
3658 }
3659
3660 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3661 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3662 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3663 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3664 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3665 {
3666 unsigned cPresent = pPage->cPresent;
3667 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3668 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3669 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3670 {
3671 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & X86_PTE_PAE_PG_MASK) == u64)
3672 {
3673 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3674 PGMSHWPTEPAE_SET(pPT->a[i], 0); /// @todo why not atomic?
3675
3676 /* Update the counter as we're removing references. */
3677 Assert(pPage->cPresent);
3678 Assert(pPool->cPresent);
3679 pPage->cPresent--;
3680 pPool->cPresent--;
3681 }
3682 if (!--cPresent)
3683 break;
3684 }
3685 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3686 break;
3687 }
3688
3689 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3690 {
3691 unsigned cPresent = pPage->cPresent;
3692 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3693 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3694 {
3695 X86PGPAEUINT const uPte = pPT->a[i].u;
3696 if (uPte & EPT_E_READ)
3697 {
3698 if ((uPte & EPT_PTE_PG_MASK) == u64)
3699 {
3700 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3701 pPT->a[i].u = 0;
3702
3703 /* Update the counter as we're removing references. */
3704 Assert(pPage->cPresent);
3705 Assert(pPool->cPresent);
3706 pPage->cPresent--;
3707 pPool->cPresent--;
3708 }
3709 if (!--cPresent)
3710 break;
3711 }
3712 }
3713 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3714 break;
3715 }
3716 }
3717
3718 if (!--cLeft)
3719 break;
3720 }
3721 }
3722
3723 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3724 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3725
3726 /*
3727 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3728 */
3729 if (pPool->cPresent > 1024)
3730 {
3731 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3732 return VINF_PGM_GCPHYS_ALIASED;
3733 }
3734
3735 return VINF_SUCCESS;
3736}
3737
3738
3739/**
3740 * Clears the user entry in a user table.
3741 *
3742 * This is used to remove all references to a page when flushing it.
3743 */
3744static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3745{
3746 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3747 Assert(pUser->iUser < pPool->cCurPages);
3748 uint32_t iUserTable = pUser->iUserTable;
3749
3750 /*
3751 * Map the user page. Ignore references made by fictitious pages.
3752 */
3753 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3754 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3755 union
3756 {
3757 uint64_t *pau64;
3758 uint32_t *pau32;
3759 } u;
3760 if (pUserPage->idx < PGMPOOL_IDX_FIRST)
3761 {
3762 Assert(!pUserPage->pvPageR3);
3763 return;
3764 }
3765 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3766
3767
3768 /* Safety precaution in case we change the paging for other modes too in the future. */
3769 Assert(!pgmPoolIsPageLocked(pPage)); RT_NOREF_PV(pPage);
3770
3771#ifdef VBOX_STRICT
3772 /*
3773 * Some sanity checks.
3774 */
3775 switch (pUserPage->enmKind)
3776 {
3777 case PGMPOOLKIND_32BIT_PD:
3778 case PGMPOOLKIND_32BIT_PD_PHYS:
3779 Assert(iUserTable < X86_PG_ENTRIES);
3780 break;
3781 case PGMPOOLKIND_PAE_PDPT:
3782 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3783 case PGMPOOLKIND_PAE_PDPT_PHYS:
3784 Assert(iUserTable < 4);
3785 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3786 break;
3787 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3788 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3789 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3790 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3791 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3792 case PGMPOOLKIND_PAE_PD_PHYS:
3793 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3794 break;
3795 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3796 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3797#ifndef PGM_WITHOUT_MAPPINGS
3798 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3799#endif
3800 break;
3801 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3802 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3803 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3804 break;
3805 case PGMPOOLKIND_64BIT_PML4:
3806 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3807 /* GCPhys >> PAGE_SHIFT is the index here */
3808 break;
3809 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3810 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3811 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3812 break;
3813
3814 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3815 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3816 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3817 break;
3818
3819 case PGMPOOLKIND_ROOT_NESTED:
3820 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3821 break;
3822
3823 default:
3824 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3825 break;
3826 }
3827#endif /* VBOX_STRICT */
3828
3829 /*
3830 * Clear the entry in the user page.
3831 */
3832 switch (pUserPage->enmKind)
3833 {
3834 /* 32-bit entries */
3835 case PGMPOOLKIND_32BIT_PD:
3836 case PGMPOOLKIND_32BIT_PD_PHYS:
3837 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
3838 break;
3839
3840 /* 64-bit entries */
3841 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3842 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3843 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3844 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3845 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3846 case PGMPOOLKIND_PAE_PD_PHYS:
3847 case PGMPOOLKIND_PAE_PDPT_PHYS:
3848 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3849 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3850 case PGMPOOLKIND_64BIT_PML4:
3851 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3852 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3853 case PGMPOOLKIND_PAE_PDPT:
3854 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3855 case PGMPOOLKIND_ROOT_NESTED:
3856 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3857 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3858 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
3859 break;
3860
3861 default:
3862 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3863 }
3864 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
3865}
3866
3867
3868/**
3869 * Clears all users of a page.
3870 */
3871static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3872{
3873 /*
3874 * Free all the user records.
3875 */
3876 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3877
3878 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3879 uint16_t i = pPage->iUserHead;
3880 while (i != NIL_PGMPOOL_USER_INDEX)
3881 {
3882 /* Clear enter in user table. */
3883 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3884
3885 /* Free it. */
3886 const uint16_t iNext = paUsers[i].iNext;
3887 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3888 paUsers[i].iNext = pPool->iUserFreeHead;
3889 pPool->iUserFreeHead = i;
3890
3891 /* Next. */
3892 i = iNext;
3893 }
3894 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3895}
3896
3897
3898/**
3899 * Allocates a new physical cross reference extent.
3900 *
3901 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3902 * @param pVM The cross context VM structure.
3903 * @param piPhysExt Where to store the phys ext index.
3904 */
3905PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3906{
3907 PGM_LOCK_ASSERT_OWNER(pVM);
3908 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3909 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3910 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3911 {
3912 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3913 return NULL;
3914 }
3915 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3916 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3917 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3918 *piPhysExt = iPhysExt;
3919 return pPhysExt;
3920}
3921
3922
3923/**
3924 * Frees a physical cross reference extent.
3925 *
3926 * @param pVM The cross context VM structure.
3927 * @param iPhysExt The extent to free.
3928 */
3929void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3930{
3931 PGM_LOCK_ASSERT_OWNER(pVM);
3932 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3933 Assert(iPhysExt < pPool->cMaxPhysExts);
3934 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3935 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3936 {
3937 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3938 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3939 }
3940 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3941 pPool->iPhysExtFreeHead = iPhysExt;
3942}
3943
3944
3945/**
3946 * Frees a physical cross reference extent.
3947 *
3948 * @param pVM The cross context VM structure.
3949 * @param iPhysExt The extent to free.
3950 */
3951void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3952{
3953 PGM_LOCK_ASSERT_OWNER(pVM);
3954 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3955
3956 const uint16_t iPhysExtStart = iPhysExt;
3957 PPGMPOOLPHYSEXT pPhysExt;
3958 do
3959 {
3960 Assert(iPhysExt < pPool->cMaxPhysExts);
3961 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3962 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3963 {
3964 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3965 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3966 }
3967
3968 /* next */
3969 iPhysExt = pPhysExt->iNext;
3970 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3971
3972 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3973 pPool->iPhysExtFreeHead = iPhysExtStart;
3974}
3975
3976
3977/**
3978 * Insert a reference into a list of physical cross reference extents.
3979 *
3980 * @returns The new tracking data for PGMPAGE.
3981 *
3982 * @param pVM The cross context VM structure.
3983 * @param iPhysExt The physical extent index of the list head.
3984 * @param iShwPT The shadow page table index.
3985 * @param iPte Page table entry
3986 *
3987 */
3988static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
3989{
3990 PGM_LOCK_ASSERT_OWNER(pVM);
3991 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3992 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3993
3994 /*
3995 * Special common cases.
3996 */
3997 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
3998 {
3999 paPhysExts[iPhysExt].aidx[1] = iShwPT;
4000 paPhysExts[iPhysExt].apte[1] = iPte;
4001 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4002 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
4003 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4004 }
4005 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
4006 {
4007 paPhysExts[iPhysExt].aidx[2] = iShwPT;
4008 paPhysExts[iPhysExt].apte[2] = iPte;
4009 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4010 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
4011 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4012 }
4013 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
4014
4015 /*
4016 * General treatment.
4017 */
4018 const uint16_t iPhysExtStart = iPhysExt;
4019 unsigned cMax = 15;
4020 for (;;)
4021 {
4022 Assert(iPhysExt < pPool->cMaxPhysExts);
4023 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4024 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
4025 {
4026 paPhysExts[iPhysExt].aidx[i] = iShwPT;
4027 paPhysExts[iPhysExt].apte[i] = iPte;
4028 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4029 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
4030 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
4031 }
4032 if (!--cMax)
4033 {
4034 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackOverflows);
4035 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4036 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
4037 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4038 }
4039
4040 /* advance */
4041 iPhysExt = paPhysExts[iPhysExt].iNext;
4042 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4043 break;
4044 }
4045
4046 /*
4047 * Add another extent to the list.
4048 */
4049 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4050 if (!pNew)
4051 {
4052 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackNoExtentsLeft);
4053 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4054 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4055 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4056 }
4057 pNew->iNext = iPhysExtStart;
4058 pNew->aidx[0] = iShwPT;
4059 pNew->apte[0] = iPte;
4060 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4061 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4062}
4063
4064
4065/**
4066 * Add a reference to guest physical page where extents are in use.
4067 *
4068 * @returns The new tracking data for PGMPAGE.
4069 *
4070 * @param pVM The cross context VM structure.
4071 * @param pPhysPage Pointer to the aPages entry in the ram range.
4072 * @param u16 The ram range flags (top 16-bits).
4073 * @param iShwPT The shadow page table index.
4074 * @param iPte Page table entry
4075 */
4076uint16_t pgmPoolTrackPhysExtAddref(PVMCC pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4077{
4078 pgmLock(pVM);
4079 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4080 {
4081 /*
4082 * Convert to extent list.
4083 */
4084 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4085 uint16_t iPhysExt;
4086 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4087 if (pPhysExt)
4088 {
4089 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4090 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliased);
4091 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4092 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4093 pPhysExt->aidx[1] = iShwPT;
4094 pPhysExt->apte[1] = iPte;
4095 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4096 }
4097 else
4098 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4099 }
4100 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4101 {
4102 /*
4103 * Insert into the extent list.
4104 */
4105 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4106 }
4107 else
4108 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedLots);
4109 pgmUnlock(pVM);
4110 return u16;
4111}
4112
4113
4114/**
4115 * Clear references to guest physical memory.
4116 *
4117 * @param pPool The pool.
4118 * @param pPage The page.
4119 * @param pPhysPage Pointer to the aPages entry in the ram range.
4120 * @param iPte Shadow PTE index
4121 */
4122void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4123{
4124 PVMCC pVM = pPool->CTX_SUFF(pVM);
4125 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4126 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4127
4128 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4129 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4130 {
4131 pgmLock(pVM);
4132
4133 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4134 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4135 do
4136 {
4137 Assert(iPhysExt < pPool->cMaxPhysExts);
4138
4139 /*
4140 * Look for the shadow page and check if it's all freed.
4141 */
4142 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4143 {
4144 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4145 && paPhysExts[iPhysExt].apte[i] == iPte)
4146 {
4147 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4148 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4149
4150 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4151 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4152 {
4153 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4154 pgmUnlock(pVM);
4155 return;
4156 }
4157
4158 /* we can free the node. */
4159 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4160 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4161 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4162 {
4163 /* lonely node */
4164 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4165 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4166 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4167 }
4168 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4169 {
4170 /* head */
4171 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4172 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4173 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4174 }
4175 else
4176 {
4177 /* in list */
4178 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4179 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4180 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4181 }
4182 iPhysExt = iPhysExtNext;
4183 pgmUnlock(pVM);
4184 return;
4185 }
4186 }
4187
4188 /* next */
4189 iPhysExtPrev = iPhysExt;
4190 iPhysExt = paPhysExts[iPhysExt].iNext;
4191 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4192
4193 pgmUnlock(pVM);
4194 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4195 }
4196 else /* nothing to do */
4197 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4198}
4199
4200/**
4201 * Clear references to guest physical memory.
4202 *
4203 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4204 * physical address is assumed to be correct, so the linear search can be
4205 * skipped and we can assert at an earlier point.
4206 *
4207 * @param pPool The pool.
4208 * @param pPage The page.
4209 * @param HCPhys The host physical address corresponding to the guest page.
4210 * @param GCPhys The guest physical address corresponding to HCPhys.
4211 * @param iPte Shadow PTE index
4212 */
4213static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4214{
4215 /*
4216 * Lookup the page and check if it checks out before derefing it.
4217 */
4218 PVMCC pVM = pPool->CTX_SUFF(pVM);
4219 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4220 if (pPhysPage)
4221 {
4222 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4223#ifdef LOG_ENABLED
4224 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4225 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4226#endif
4227 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4228 {
4229 Assert(pPage->cPresent);
4230 Assert(pPool->cPresent);
4231 pPage->cPresent--;
4232 pPool->cPresent--;
4233 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4234 return;
4235 }
4236
4237 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp\n",
4238 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage)));
4239 }
4240 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4241}
4242
4243
4244/**
4245 * Clear references to guest physical memory.
4246 *
4247 * @param pPool The pool.
4248 * @param pPage The page.
4249 * @param HCPhys The host physical address corresponding to the guest page.
4250 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4251 * @param iPte Shadow pte index
4252 */
4253void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4254{
4255 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4256
4257 /*
4258 * Try the hint first.
4259 */
4260 RTHCPHYS HCPhysHinted;
4261 PVMCC pVM = pPool->CTX_SUFF(pVM);
4262 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4263 if (pPhysPage)
4264 {
4265 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4266 Assert(HCPhysHinted);
4267 if (HCPhysHinted == HCPhys)
4268 {
4269 Assert(pPage->cPresent);
4270 Assert(pPool->cPresent);
4271 pPage->cPresent--;
4272 pPool->cPresent--;
4273 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4274 return;
4275 }
4276 }
4277 else
4278 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4279
4280 /*
4281 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4282 */
4283 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4284 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX);
4285 while (pRam)
4286 {
4287 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4288 while (iPage-- > 0)
4289 {
4290 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4291 {
4292 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4293 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4294 Assert(pPage->cPresent);
4295 Assert(pPool->cPresent);
4296 pPage->cPresent--;
4297 pPool->cPresent--;
4298 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4299 return;
4300 }
4301 }
4302 pRam = pRam->CTX_SUFF(pNext);
4303 }
4304
4305 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4306}
4307
4308
4309/**
4310 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4311 *
4312 * @param pPool The pool.
4313 * @param pPage The page.
4314 * @param pShwPT The shadow page table (mapping of the page).
4315 * @param pGstPT The guest page table.
4316 */
4317DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4318{
4319 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4320 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4321 {
4322 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4323 if (pShwPT->a[i].n.u1Present)
4324 {
4325 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4326 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4327 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & fPgMask, i);
4328 if (!pPage->cPresent)
4329 break;
4330 }
4331 }
4332}
4333
4334
4335/**
4336 * Clear references to guest physical memory in a PAE / 32-bit page table.
4337 *
4338 * @param pPool The pool.
4339 * @param pPage The page.
4340 * @param pShwPT The shadow page table (mapping of the page).
4341 * @param pGstPT The guest page table (just a half one).
4342 */
4343DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4344{
4345 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4346 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4347 {
4348 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4349 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4350 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4351 {
4352 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4353 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4354 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4355 if (!pPage->cPresent)
4356 break;
4357 }
4358 }
4359}
4360
4361
4362/**
4363 * Clear references to guest physical memory in a PAE / PAE page table.
4364 *
4365 * @param pPool The pool.
4366 * @param pPage The page.
4367 * @param pShwPT The shadow page table (mapping of the page).
4368 * @param pGstPT The guest page table.
4369 */
4370DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4371{
4372 RTGCPHYS const fPgMask = pPage->fA20Enabled ? X86_PTE_PAE_PG_MASK : X86_PTE_PAE_PG_MASK & ~RT_BIT_64(20);
4373 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4374 {
4375 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4376 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4377 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4378 {
4379 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4380 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4381 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4382 if (!pPage->cPresent)
4383 break;
4384 }
4385 }
4386}
4387
4388
4389/**
4390 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4391 *
4392 * @param pPool The pool.
4393 * @param pPage The page.
4394 * @param pShwPT The shadow page table (mapping of the page).
4395 */
4396DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4397{
4398 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4399 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4400 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4401 {
4402 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4403 if (pShwPT->a[i].n.u1Present)
4404 {
4405 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4406 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4407 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4408 if (!pPage->cPresent)
4409 break;
4410 }
4411 }
4412}
4413
4414
4415/**
4416 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4417 *
4418 * @param pPool The pool.
4419 * @param pPage The page.
4420 * @param pShwPT The shadow page table (mapping of the page).
4421 */
4422DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4423{
4424 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4425 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4426 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4427 {
4428 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4429 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4430 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4431 {
4432 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4433 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4434 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys & GCPhysA20Mask, i);
4435 if (!pPage->cPresent)
4436 break;
4437 }
4438 }
4439}
4440
4441
4442/**
4443 * Clear references to shadowed pages in an EPT page table.
4444 *
4445 * @param pPool The pool.
4446 * @param pPage The page.
4447 * @param pShwPT The shadow page directory pointer table (mapping of the
4448 * page).
4449 */
4450DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4451{
4452 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4453 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4454 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4455 {
4456 X86PGPAEUINT const uPte = pShwPT->a[i].u;
4457 Assert((uPte & UINT64_C(0xfff0000000000f80)) == 0);
4458 if (uPte & EPT_E_READ)
4459 {
4460 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4461 i, uPte & EPT_PTE_PG_MASK, pPage->GCPhys));
4462 pgmPoolTracDerefGCPhys(pPool, pPage, uPte & EPT_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4463 if (!pPage->cPresent)
4464 break;
4465 }
4466 }
4467}
4468
4469
4470/**
4471 * Clear references to shadowed pages in a 32 bits page directory.
4472 *
4473 * @param pPool The pool.
4474 * @param pPage The page.
4475 * @param pShwPD The shadow page directory (mapping of the page).
4476 */
4477DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4478{
4479 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4480 {
4481 X86PGUINT const uPde = pShwPD->a[i].u;
4482#ifndef PGM_WITHOUT_MAPPINGS
4483 if ((uPde & (X86_PDE_P | PGM_PDFLAGS_MAPPING)) == X86_PDE_P)
4484#else
4485 if (uPde & X86_PDE_P)
4486#endif
4487 {
4488 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4489 if (pSubPage)
4490 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4491 else
4492 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4493 }
4494 }
4495}
4496
4497
4498/**
4499 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4500 *
4501 * @param pPool The pool.
4502 * @param pPage The page.
4503 * @param pShwPD The shadow page directory (mapping of the page).
4504 */
4505DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4506{
4507 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4508 {
4509 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4510#ifndef PGM_WITHOUT_MAPPINGS
4511 if ((uPde & (X86_PDE_P | PGM_PDFLAGS_MAPPING)) == X86_PDE_P)
4512#else
4513 if (uPde & X86_PDE_P)
4514#endif
4515 {
4516#ifdef PGM_WITH_LARGE_PAGES
4517 if (uPde & X86_PDE_PS)
4518 {
4519 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4520 i, uPde & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4521 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & X86_PDE2M_PAE_PG_MASK,
4522 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4523 i);
4524 }
4525 else
4526#endif
4527 {
4528 Assert((uPde & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000000))) == 0);
4529 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & X86_PDE_PAE_PG_MASK);
4530 if (pSubPage)
4531 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4532 else
4533 AssertFatalMsgFailed(("%RX64\n", uPde & X86_PDE_PAE_PG_MASK));
4534 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4535 }
4536 }
4537 }
4538}
4539
4540
4541/**
4542 * Clear references to shadowed pages in a PAE page directory pointer table.
4543 *
4544 * @param pPool The pool.
4545 * @param pPage The page.
4546 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4547 */
4548DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4549{
4550 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4551 {
4552 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4553 Assert((uPdpe & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4554 if ( uPdpe & X86_PDPE_P
4555#ifndef PGM_WITHOUT_MAPPINGS
4556 && !(uPdpe & PGM_PLXFLAGS_MAPPING)
4557#endif
4558 )
4559 {
4560 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & X86_PDPE_PG_MASK);
4561 if (pSubPage)
4562 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4563 else
4564 AssertFatalMsgFailed(("%RX64\n", uPdpe & X86_PDPE_PG_MASK));
4565 }
4566 }
4567}
4568
4569
4570/**
4571 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4572 *
4573 * @param pPool The pool.
4574 * @param pPage The page.
4575 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4576 */
4577DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4578{
4579 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4580 {
4581 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4582 Assert((uPdpe & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4583 if (uPdpe & X86_PDPE_P)
4584 {
4585 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & X86_PDPE_PG_MASK);
4586 if (pSubPage)
4587 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4588 else
4589 AssertFatalMsgFailed(("%RX64\n", uPdpe & X86_PDPE_PG_MASK));
4590 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4591 }
4592 }
4593}
4594
4595
4596/**
4597 * Clear references to shadowed pages in a 64-bit level 4 page table.
4598 *
4599 * @param pPool The pool.
4600 * @param pPage The page.
4601 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4602 */
4603DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4604{
4605 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4606 {
4607 X86PGPAEUINT const uPml4e = pShwPML4->a[i].u;
4608 Assert((uPml4e & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4609 if (uPml4e & X86_PML4E_P)
4610 {
4611 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPml4e & X86_PDPE_PG_MASK);
4612 if (pSubPage)
4613 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4614 else
4615 AssertFatalMsgFailed(("%RX64\n", uPml4e & X86_PML4E_PG_MASK));
4616 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4617 }
4618 }
4619}
4620
4621
4622/**
4623 * Clear references to shadowed pages in an EPT page directory.
4624 *
4625 * @param pPool The pool.
4626 * @param pPage The page.
4627 * @param pShwPD The shadow page directory (mapping of the page).
4628 */
4629DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4630{
4631 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4632 {
4633 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4634 Assert((uPde & UINT64_C(0xfff0000000000f80)) == 0);
4635 if (uPde & EPT_E_READ)
4636 {
4637#ifdef PGM_WITH_LARGE_PAGES
4638 if (uPde & EPT_E_LEAF)
4639 {
4640 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4641 i, uPde & EPT_PDE2M_PG_MASK, pPage->GCPhys));
4642 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & EPT_PDE2M_PG_MASK,
4643 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4644 i);
4645 }
4646 else
4647#endif
4648 {
4649 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & EPT_PDE_PG_MASK);
4650 if (pSubPage)
4651 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4652 else
4653 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4654 }
4655 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4656 }
4657 }
4658}
4659
4660
4661/**
4662 * Clear references to shadowed pages in an EPT page directory pointer table.
4663 *
4664 * @param pPool The pool.
4665 * @param pPage The page.
4666 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4667 */
4668DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4669{
4670 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4671 {
4672 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4673 Assert((uPdpe & UINT64_C(0xfff0000000000f80)) == 0);
4674 if (uPdpe & EPT_E_READ)
4675 {
4676 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & EPT_PDPTE_PG_MASK);
4677 if (pSubPage)
4678 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4679 else
4680 AssertFatalMsgFailed(("%RX64\n", uPdpe & EPT_PDPTE_PG_MASK));
4681 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4682 }
4683 }
4684}
4685
4686
4687/**
4688 * Clears all references made by this page.
4689 *
4690 * This includes other shadow pages and GC physical addresses.
4691 *
4692 * @param pPool The pool.
4693 * @param pPage The page.
4694 */
4695static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4696{
4697 /*
4698 * Map the shadow page and take action according to the page kind.
4699 */
4700 PVMCC pVM = pPool->CTX_SUFF(pVM);
4701 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4702 switch (pPage->enmKind)
4703 {
4704 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4705 {
4706 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4707 void *pvGst;
4708 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4709 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4710 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4711 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4712 break;
4713 }
4714
4715 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4716 {
4717 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4718 void *pvGst;
4719 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4720 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
4721 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4722 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4723 break;
4724 }
4725
4726 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4727 {
4728 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4729 void *pvGst;
4730 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4731 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
4732 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4733 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4734 break;
4735 }
4736
4737 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4738 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4739 {
4740 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4741 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4742 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4743 break;
4744 }
4745
4746 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4747 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4748 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4749 {
4750 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4751 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
4752 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4753 break;
4754 }
4755
4756 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4757 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4758 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4759 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4760 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4761 case PGMPOOLKIND_PAE_PD_PHYS:
4762 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4763 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4764 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4765 break;
4766
4767 case PGMPOOLKIND_32BIT_PD_PHYS:
4768 case PGMPOOLKIND_32BIT_PD:
4769 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4770 break;
4771
4772 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4773 case PGMPOOLKIND_PAE_PDPT:
4774 case PGMPOOLKIND_PAE_PDPT_PHYS:
4775 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4776 break;
4777
4778 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4779 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4780 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4781 break;
4782
4783 case PGMPOOLKIND_64BIT_PML4:
4784 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4785 break;
4786
4787 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4788 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4789 break;
4790
4791 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4792 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4793 break;
4794
4795 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4796 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4797 break;
4798
4799 default:
4800 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4801 }
4802
4803 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4804 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4805 ASMMemZeroPage(pvShw);
4806 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4807 pPage->fZeroed = true;
4808 Assert(!pPage->cPresent);
4809 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
4810}
4811
4812
4813/**
4814 * Flushes a pool page.
4815 *
4816 * This moves the page to the free list after removing all user references to it.
4817 *
4818 * @returns VBox status code.
4819 * @retval VINF_SUCCESS on success.
4820 * @param pPool The pool.
4821 * @param pPage The shadow page.
4822 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4823 */
4824int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4825{
4826 PVMCC pVM = pPool->CTX_SUFF(pVM);
4827 bool fFlushRequired = false;
4828
4829 int rc = VINF_SUCCESS;
4830 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4831 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4832 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4833
4834 /*
4835 * Reject any attempts at flushing any of the special root pages (shall
4836 * not happen).
4837 */
4838 AssertMsgReturn(pPage->idx >= PGMPOOL_IDX_FIRST,
4839 ("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n",
4840 pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx),
4841 VINF_SUCCESS);
4842
4843 pgmLock(pVM);
4844
4845 /*
4846 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4847 */
4848 if (pgmPoolIsPageLocked(pPage))
4849 {
4850 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4851 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4852 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4853 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4854 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4855 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4856 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4857 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4858 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
4859 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
4860 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4861 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4862 pgmUnlock(pVM);
4863 return VINF_SUCCESS;
4864 }
4865
4866#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4867 /* Start a subset so we won't run out of mapping space. */
4868 PVMCPU pVCpu = VMMGetCpu(pVM);
4869 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
4870#endif
4871
4872 /*
4873 * Mark the page as being in need of an ASMMemZeroPage().
4874 */
4875 pPage->fZeroed = false;
4876
4877#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4878 if (pPage->fDirty)
4879 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirtyEntry, false /* do not remove */);
4880#endif
4881
4882 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4883 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4884 fFlushRequired = true;
4885
4886 /*
4887 * Clear the page.
4888 */
4889 pgmPoolTrackClearPageUsers(pPool, pPage);
4890 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4891 pgmPoolTrackDeref(pPool, pPage);
4892 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4893
4894 /*
4895 * Flush it from the cache.
4896 */
4897 pgmPoolCacheFlushPage(pPool, pPage);
4898
4899#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4900 /* Heavy stuff done. */
4901 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
4902#endif
4903
4904 /*
4905 * Deregistering the monitoring.
4906 */
4907 if (pPage->fMonitored)
4908 rc = pgmPoolMonitorFlush(pPool, pPage);
4909
4910 /*
4911 * Free the page.
4912 */
4913 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4914 pPage->iNext = pPool->iFreeHead;
4915 pPool->iFreeHead = pPage->idx;
4916 pPage->enmKind = PGMPOOLKIND_FREE;
4917 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4918 pPage->GCPhys = NIL_RTGCPHYS;
4919 pPage->fReusedFlushPending = false;
4920
4921 pPool->cUsedPages--;
4922
4923 /* Flush the TLBs of all VCPUs if required. */
4924 if ( fFlushRequired
4925 && fFlush)
4926 {
4927 PGM_INVL_ALL_VCPU_TLBS(pVM);
4928 }
4929
4930 pgmUnlock(pVM);
4931 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4932 return rc;
4933}
4934
4935
4936/**
4937 * Frees a usage of a pool page.
4938 *
4939 * The caller is responsible to updating the user table so that it no longer
4940 * references the shadow page.
4941 *
4942 * @param pPool The pool.
4943 * @param pPage The shadow page.
4944 * @param iUser The shadow page pool index of the user table.
4945 * NIL_PGMPOOL_IDX for root pages.
4946 * @param iUserTable The index into the user table (shadowed). Ignored if
4947 * root page.
4948 */
4949void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4950{
4951 PVMCC pVM = pPool->CTX_SUFF(pVM);
4952
4953 STAM_PROFILE_START(&pPool->StatFree, a);
4954 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
4955 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4956 AssertReturnVoid(pPage->idx >= PGMPOOL_IDX_FIRST); /* paranoia (#6349) */
4957
4958 pgmLock(pVM);
4959 if (iUser != NIL_PGMPOOL_IDX)
4960 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4961 if (!pPage->fCached)
4962 pgmPoolFlushPage(pPool, pPage);
4963 pgmUnlock(pVM);
4964 STAM_PROFILE_STOP(&pPool->StatFree, a);
4965}
4966
4967
4968/**
4969 * Makes one or more free page free.
4970 *
4971 * @returns VBox status code.
4972 * @retval VINF_SUCCESS on success.
4973 *
4974 * @param pPool The pool.
4975 * @param enmKind Page table kind
4976 * @param iUser The user of the page.
4977 */
4978static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4979{
4980 PVMCC pVM = pPool->CTX_SUFF(pVM);
4981 LogFlow(("pgmPoolMakeMoreFreePages: enmKind=%d iUser=%d\n", enmKind, iUser));
4982 NOREF(enmKind);
4983
4984 /*
4985 * If the pool isn't full grown yet, expand it.
4986 */
4987 if (pPool->cCurPages < pPool->cMaxPages)
4988 {
4989 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4990#ifdef IN_RING3
4991 int rc = PGMR3PoolGrow(pVM, VMMGetCpu(pVM));
4992#else
4993 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
4994#endif
4995 if (RT_FAILURE(rc))
4996 return rc;
4997 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4998 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4999 return VINF_SUCCESS;
5000 }
5001
5002 /*
5003 * Free one cached page.
5004 */
5005 return pgmPoolCacheFreeOne(pPool, iUser);
5006}
5007
5008
5009/**
5010 * Allocates a page from the pool.
5011 *
5012 * This page may actually be a cached page and not in need of any processing
5013 * on the callers part.
5014 *
5015 * @returns VBox status code.
5016 * @retval VINF_SUCCESS if a NEW page was allocated.
5017 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
5018 *
5019 * @param pVM The cross context VM structure.
5020 * @param GCPhys The GC physical address of the page we're gonna shadow.
5021 * For 4MB and 2MB PD entries, it's the first address the
5022 * shadow PT is covering.
5023 * @param enmKind The kind of mapping.
5024 * @param enmAccess Access type for the mapping (only relevant for big pages)
5025 * @param fA20Enabled Whether the A20 gate is enabled or not.
5026 * @param iUser The shadow page pool index of the user table. Root
5027 * pages should pass NIL_PGMPOOL_IDX.
5028 * @param iUserTable The index into the user table (shadowed). Ignored for
5029 * root pages (iUser == NIL_PGMPOOL_IDX).
5030 * @param fLockPage Lock the page
5031 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
5032 */
5033int pgmPoolAlloc(PVMCC pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
5034 uint16_t iUser, uint32_t iUserTable, bool fLockPage, PPPGMPOOLPAGE ppPage)
5035{
5036 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5037 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
5038 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
5039 *ppPage = NULL;
5040 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
5041 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
5042 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
5043
5044 pgmLock(pVM);
5045
5046 if (pPool->fCacheEnabled)
5047 {
5048 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, fA20Enabled, iUser, iUserTable, ppPage);
5049 if (RT_SUCCESS(rc2))
5050 {
5051 if (fLockPage)
5052 pgmPoolLockPage(pPool, *ppPage);
5053 pgmUnlock(pVM);
5054 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5055 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
5056 return rc2;
5057 }
5058 }
5059
5060 /*
5061 * Allocate a new one.
5062 */
5063 int rc = VINF_SUCCESS;
5064 uint16_t iNew = pPool->iFreeHead;
5065 if (iNew == NIL_PGMPOOL_IDX)
5066 {
5067 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
5068 if (RT_FAILURE(rc))
5069 {
5070 pgmUnlock(pVM);
5071 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
5072 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5073 return rc;
5074 }
5075 iNew = pPool->iFreeHead;
5076 AssertReleaseMsgReturn(iNew != NIL_PGMPOOL_IDX, ("iNew=%#x\n", iNew), VERR_PGM_POOL_IPE);
5077 }
5078
5079 /* unlink the free head */
5080 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5081 pPool->iFreeHead = pPage->iNext;
5082 pPage->iNext = NIL_PGMPOOL_IDX;
5083
5084 /*
5085 * Initialize it.
5086 */
5087 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5088 pPage->enmKind = enmKind;
5089 pPage->enmAccess = enmAccess;
5090 pPage->GCPhys = GCPhys;
5091 pPage->fA20Enabled = fA20Enabled;
5092 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5093 pPage->fMonitored = false;
5094 pPage->fCached = false;
5095 pPage->fDirty = false;
5096 pPage->fReusedFlushPending = false;
5097 pPage->cModifications = 0;
5098 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5099 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5100 pPage->cPresent = 0;
5101 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5102 pPage->idxDirtyEntry = 0;
5103 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5104 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5105 pPage->cLastAccessHandler = 0;
5106 pPage->cLocked = 0;
5107# ifdef VBOX_STRICT
5108 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5109# endif
5110
5111 /*
5112 * Insert into the tracking and cache. If this fails, free the page.
5113 */
5114 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5115 if (RT_FAILURE(rc3))
5116 {
5117 pPool->cUsedPages--;
5118 pPage->enmKind = PGMPOOLKIND_FREE;
5119 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5120 pPage->GCPhys = NIL_RTGCPHYS;
5121 pPage->iNext = pPool->iFreeHead;
5122 pPool->iFreeHead = pPage->idx;
5123 pgmUnlock(pVM);
5124 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5125 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
5126 return rc3;
5127 }
5128
5129 /*
5130 * Commit the allocation, clear the page and return.
5131 */
5132#ifdef VBOX_WITH_STATISTICS
5133 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5134 pPool->cUsedPagesHigh = pPool->cUsedPages;
5135#endif
5136
5137 if (!pPage->fZeroed)
5138 {
5139 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5140 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5141 ASMMemZeroPage(pv);
5142 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5143 }
5144
5145 *ppPage = pPage;
5146 if (fLockPage)
5147 pgmPoolLockPage(pPool, pPage);
5148 pgmUnlock(pVM);
5149 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5150 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5151 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5152 return rc;
5153}
5154
5155
5156/**
5157 * Frees a usage of a pool page.
5158 *
5159 * @param pVM The cross context VM structure.
5160 * @param HCPhys The HC physical address of the shadow page.
5161 * @param iUser The shadow page pool index of the user table.
5162 * NIL_PGMPOOL_IDX if root page.
5163 * @param iUserTable The index into the user table (shadowed). Ignored if
5164 * root page.
5165 */
5166void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5167{
5168 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5169 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5170 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5171}
5172
5173
5174/**
5175 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5176 *
5177 * @returns Pointer to the shadow page structure.
5178 * @param pPool The pool.
5179 * @param HCPhys The HC physical address of the shadow page.
5180 */
5181PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5182{
5183 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5184
5185 /*
5186 * Look up the page.
5187 */
5188 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5189
5190 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5191 return pPage;
5192}
5193
5194
5195/**
5196 * Internal worker for finding a page for debugging purposes, no assertions.
5197 *
5198 * @returns Pointer to the shadow page structure. NULL on if not found.
5199 * @param pPool The pool.
5200 * @param HCPhys The HC physical address of the shadow page.
5201 */
5202PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5203{
5204 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5205 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5206}
5207
5208
5209/**
5210 * Internal worker for PGM_HCPHYS_2_PTR.
5211 *
5212 * @returns VBox status code.
5213 * @param pVM The cross context VM structure.
5214 * @param HCPhys The HC physical address of the shadow page.
5215 * @param ppv Where to return the address.
5216 */
5217int pgmPoolHCPhys2Ptr(PVM pVM, RTHCPHYS HCPhys, void **ppv)
5218{
5219 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pVM->pgm.s.CTX_SUFF(pPool)->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5220 AssertMsgReturn(pPage && pPage->enmKind != PGMPOOLKIND_FREE,
5221 ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0),
5222 VERR_PGM_POOL_GET_PAGE_FAILED);
5223 *ppv = (uint8_t *)pPage->CTX_SUFF(pvPage) + (HCPhys & PAGE_OFFSET_MASK);
5224 return VINF_SUCCESS;
5225}
5226
5227#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5228
5229/**
5230 * Flush the specified page if present
5231 *
5232 * @param pVM The cross context VM structure.
5233 * @param GCPhys Guest physical address of the page to flush
5234 */
5235void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5236{
5237 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5238
5239 VM_ASSERT_EMT(pVM);
5240
5241 /*
5242 * Look up the GCPhys in the hash.
5243 */
5244 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5245 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5246 if (i == NIL_PGMPOOL_IDX)
5247 return;
5248
5249 do
5250 {
5251 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5252 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5253 {
5254 switch (pPage->enmKind)
5255 {
5256 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5257 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5258 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5259 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5260 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5261 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5262 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5263 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5264 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5265 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5266 case PGMPOOLKIND_64BIT_PML4:
5267 case PGMPOOLKIND_32BIT_PD:
5268 case PGMPOOLKIND_PAE_PDPT:
5269 {
5270 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5271# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5272 if (pPage->fDirty)
5273 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5274 else
5275# endif
5276 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5277 Assert(!pgmPoolIsPageLocked(pPage));
5278 pgmPoolMonitorChainFlush(pPool, pPage);
5279 return;
5280 }
5281
5282 /* ignore, no monitoring. */
5283 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5284 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5285 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5286 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5287 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5288 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5289 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5290 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5291 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5292 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5293 case PGMPOOLKIND_ROOT_NESTED:
5294 case PGMPOOLKIND_PAE_PD_PHYS:
5295 case PGMPOOLKIND_PAE_PDPT_PHYS:
5296 case PGMPOOLKIND_32BIT_PD_PHYS:
5297 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5298 break;
5299
5300 default:
5301 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5302 }
5303 }
5304
5305 /* next */
5306 i = pPage->iNext;
5307 } while (i != NIL_PGMPOOL_IDX);
5308 return;
5309}
5310
5311
5312/**
5313 * Reset CPU on hot plugging.
5314 *
5315 * @param pVM The cross context VM structure.
5316 * @param pVCpu The cross context virtual CPU structure.
5317 */
5318void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5319{
5320 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5321
5322 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5323 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5324 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5325}
5326
5327
5328/**
5329 * Flushes the entire cache.
5330 *
5331 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5332 * this and execute this CR3 flush.
5333 *
5334 * @param pVM The cross context VM structure.
5335 */
5336void pgmR3PoolReset(PVM pVM)
5337{
5338 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5339
5340 PGM_LOCK_ASSERT_OWNER(pVM);
5341 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5342 LogFlow(("pgmR3PoolReset:\n"));
5343
5344 /*
5345 * If there are no pages in the pool, there is nothing to do.
5346 */
5347 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5348 {
5349 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5350 return;
5351 }
5352
5353 /*
5354 * Exit the shadow mode since we're going to clear everything,
5355 * including the root page.
5356 */
5357 VMCC_FOR_EACH_VMCPU(pVM)
5358 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5359 VMCC_FOR_EACH_VMCPU_END(pVM);
5360
5361
5362 /*
5363 * Nuke the free list and reinsert all pages into it.
5364 */
5365 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5366 {
5367 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5368
5369 if (pPage->fMonitored)
5370 pgmPoolMonitorFlush(pPool, pPage);
5371 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5372 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5373 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5374 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5375 pPage->GCPhys = NIL_RTGCPHYS;
5376 pPage->enmKind = PGMPOOLKIND_FREE;
5377 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5378 Assert(pPage->idx == i);
5379 pPage->iNext = i + 1;
5380 pPage->fA20Enabled = true;
5381 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5382 pPage->fSeenNonGlobal = false;
5383 pPage->fMonitored = false;
5384 pPage->fDirty = false;
5385 pPage->fCached = false;
5386 pPage->fReusedFlushPending = false;
5387 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5388 pPage->cPresent = 0;
5389 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5390 pPage->cModifications = 0;
5391 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5392 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5393 pPage->idxDirtyEntry = 0;
5394 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5395 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5396 pPage->cLastAccessHandler = 0;
5397 pPage->cLocked = 0;
5398# ifdef VBOX_STRICT
5399 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5400# endif
5401 }
5402 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5403 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5404 pPool->cUsedPages = 0;
5405
5406 /*
5407 * Zap and reinitialize the user records.
5408 */
5409 pPool->cPresent = 0;
5410 pPool->iUserFreeHead = 0;
5411 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5412 const unsigned cMaxUsers = pPool->cMaxUsers;
5413 for (unsigned i = 0; i < cMaxUsers; i++)
5414 {
5415 paUsers[i].iNext = i + 1;
5416 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5417 paUsers[i].iUserTable = 0xfffffffe;
5418 }
5419 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5420
5421 /*
5422 * Clear all the GCPhys links and rebuild the phys ext free list.
5423 */
5424 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
5425 pRam;
5426 pRam = pRam->CTX_SUFF(pNext))
5427 {
5428 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5429 while (iPage-- > 0)
5430 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5431 }
5432
5433 pPool->iPhysExtFreeHead = 0;
5434 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5435 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5436 for (unsigned i = 0; i < cMaxPhysExts; i++)
5437 {
5438 paPhysExts[i].iNext = i + 1;
5439 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5440 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5441 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5442 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5443 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5444 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5445 }
5446 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5447
5448 /*
5449 * Just zap the modified list.
5450 */
5451 pPool->cModifiedPages = 0;
5452 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5453
5454 /*
5455 * Clear the GCPhys hash and the age list.
5456 */
5457 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5458 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5459 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5460 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5461
5462# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5463 /* Clear all dirty pages. */
5464 pPool->idxFreeDirtyPage = 0;
5465 pPool->cDirtyPages = 0;
5466 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aidxDirtyPages); i++)
5467 pPool->aidxDirtyPages[i] = NIL_PGMPOOL_IDX;
5468# endif
5469
5470 /*
5471 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5472 */
5473 VMCC_FOR_EACH_VMCPU(pVM)
5474 {
5475 /*
5476 * Re-enter the shadowing mode and assert Sync CR3 FF.
5477 */
5478 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5479 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5480 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5481 }
5482 VMCC_FOR_EACH_VMCPU_END(pVM);
5483
5484 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5485}
5486
5487#endif /* IN_RING3 */
5488
5489#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5490/**
5491 * Stringifies a PGMPOOLKIND value.
5492 */
5493static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5494{
5495 switch ((PGMPOOLKIND)enmKind)
5496 {
5497 case PGMPOOLKIND_INVALID:
5498 return "PGMPOOLKIND_INVALID";
5499 case PGMPOOLKIND_FREE:
5500 return "PGMPOOLKIND_FREE";
5501 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5502 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5503 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5504 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5505 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5506 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5507 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5508 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5509 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5510 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5511 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5512 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5513 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5514 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5515 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5516 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5517 case PGMPOOLKIND_32BIT_PD:
5518 return "PGMPOOLKIND_32BIT_PD";
5519 case PGMPOOLKIND_32BIT_PD_PHYS:
5520 return "PGMPOOLKIND_32BIT_PD_PHYS";
5521 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5522 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5523 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5524 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5525 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5526 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5527 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5528 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5529 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5530 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5531 case PGMPOOLKIND_PAE_PD_PHYS:
5532 return "PGMPOOLKIND_PAE_PD_PHYS";
5533 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5534 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5535 case PGMPOOLKIND_PAE_PDPT:
5536 return "PGMPOOLKIND_PAE_PDPT";
5537 case PGMPOOLKIND_PAE_PDPT_PHYS:
5538 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5539 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5540 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5541 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5542 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5543 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5544 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5545 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5546 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5547 case PGMPOOLKIND_64BIT_PML4:
5548 return "PGMPOOLKIND_64BIT_PML4";
5549 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5550 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5551 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5552 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5553 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5554 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5555 case PGMPOOLKIND_ROOT_NESTED:
5556 return "PGMPOOLKIND_ROOT_NESTED";
5557 }
5558 return "Unknown kind!";
5559}
5560#endif /* LOG_ENABLED || VBOX_STRICT */
5561
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette