VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 93620

Last change on this file since 93620 was 93619, checked in by vboxsync, 3 years ago

VMM/PGMPool: Missed a coupld of MMHyperCCToR3/R0 calls. bugref:10093

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 208.2 KB
Line 
1/* $Id: PGMAllPool.cpp 93619 2022-02-06 09:37:24Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#define VBOX_WITHOUT_PAGING_BIT_FIELDS /* 64-bit bitfields are just asking for trouble. See @bugref{9841} and others. */
24#include <VBox/vmm/pgm.h>
25#include <VBox/vmm/mm.h>
26#include <VBox/vmm/em.h>
27#include <VBox/vmm/cpum.h>
28#include "PGMInternal.h"
29#include <VBox/vmm/vmcc.h>
30#include "PGMInline.h"
31#include <VBox/disopcode.h>
32#include <VBox/vmm/hm_vmx.h>
33
34#include <VBox/log.h>
35#include <VBox/err.h>
36#include <iprt/asm.h>
37#include <iprt/asm-amd64-x86.h>
38#include <iprt/string.h>
39
40
41/*********************************************************************************************************************************
42* Internal Functions *
43*********************************************************************************************************************************/
44RT_C_DECLS_BEGIN
45#if 0 /* unused */
46DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
47DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
48#endif /* unused */
49static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
50static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
51static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
52static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
54static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
55#endif
56#if 0 /*defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)*/
57static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
58#endif
59
60int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage);
61PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVMCC pVM, uint16_t *piPhysExt);
62void pgmPoolTrackPhysExtFree(PVMCC pVM, uint16_t iPhysExt);
63void pgmPoolTrackPhysExtFreeList(PVMCC pVM, uint16_t iPhysExt);
64
65RT_C_DECLS_END
66
67
68#if 0 /* unused */
69/**
70 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
71 *
72 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
73 * @param enmKind The page kind.
74 */
75DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
76{
77 switch (enmKind)
78 {
79 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
81 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
82 return true;
83 default:
84 return false;
85 }
86}
87#endif /* unused */
88
89
90/**
91 * Flushes a chain of pages sharing the same access monitor.
92 *
93 * @param pPool The pool.
94 * @param pPage A page in the chain.
95 */
96void pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
97{
98 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
99
100 /*
101 * Find the list head.
102 */
103 uint16_t idx = pPage->idx;
104 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
105 {
106 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
107 {
108 idx = pPage->iMonitoredPrev;
109 Assert(idx != pPage->idx);
110 pPage = &pPool->aPages[idx];
111 }
112 }
113
114 /*
115 * Iterate the list flushing each shadow page.
116 */
117 for (;;)
118 {
119 idx = pPage->iMonitoredNext;
120 Assert(idx != pPage->idx);
121 if (pPage->idx >= PGMPOOL_IDX_FIRST)
122 {
123 int rc2 = pgmPoolFlushPage(pPool, pPage);
124 AssertRC(rc2);
125 }
126 /* next */
127 if (idx == NIL_PGMPOOL_IDX)
128 break;
129 pPage = &pPool->aPages[idx];
130 }
131}
132
133
134/**
135 * Wrapper for getting the current context pointer to the entry being modified.
136 *
137 * @returns VBox status code suitable for scheduling.
138 * @param pVM The cross context VM structure.
139 * @param pvDst Destination address
140 * @param pvSrc Pointer to the mapping of @a GCPhysSrc or NULL depending
141 * on the context (e.g. \#PF in R0 & RC).
142 * @param GCPhysSrc The source guest physical address.
143 * @param cb Size of data to read
144 */
145DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVMCC pVM, void *pvDst, void const *pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
146{
147#if defined(IN_RING3)
148 NOREF(pVM); NOREF(GCPhysSrc);
149 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
150 return VINF_SUCCESS;
151#else
152 /** @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
153 NOREF(pvSrc);
154 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
155#endif
156}
157
158
159/**
160 * Process shadow entries before they are changed by the guest.
161 *
162 * For PT entries we will clear them. For PD entries, we'll simply check
163 * for mapping conflicts and set the SyncCR3 FF if found.
164 *
165 * @param pVCpu The cross context virtual CPU structure.
166 * @param pPool The pool.
167 * @param pPage The head page.
168 * @param GCPhysFault The guest physical fault address.
169 * @param pvAddress Pointer to the mapping of @a GCPhysFault or NULL
170 * depending on the context (e.g. \#PF in R0 & RC).
171 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
172 */
173static void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault,
174 void const *pvAddress, unsigned cbWrite)
175{
176 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
177 const unsigned off = GCPhysFault & GUEST_PAGE_OFFSET_MASK;
178 PVMCC pVM = pPool->CTX_SUFF(pVM);
179 NOREF(pVCpu);
180
181 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n",
182 (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))(uintptr_t)pvAddress, GCPhysFault, cbWrite));
183
184 for (;;)
185 {
186 union
187 {
188 void *pv;
189 PX86PT pPT;
190 PPGMSHWPTPAE pPTPae;
191 PX86PD pPD;
192 PX86PDPAE pPDPae;
193 PX86PDPT pPDPT;
194 PX86PML4 pPML4;
195 } uShw;
196
197 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s write=%#x\n",
198 pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
199
200 uShw.pv = NULL;
201 switch (pPage->enmKind)
202 {
203 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
204 {
205 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
206 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
207 const unsigned iShw = off / sizeof(X86PTE);
208 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
209 X86PGUINT const uPde = uShw.pPT->a[iShw].u;
210 if (uPde & X86_PTE_P)
211 {
212 X86PTE GstPte;
213 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
214 AssertRC(rc);
215 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uPde & X86_PTE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
216 pgmPoolTracDerefGCPhysHint(pPool, pPage, uPde & X86_PTE_PG_MASK, GstPte.u & X86_PTE_PG_MASK, iShw);
217 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
218 }
219 break;
220 }
221
222 /* page/2 sized */
223 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
224 {
225 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
226 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
227 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
228 {
229 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
230 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
231 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
232 {
233 X86PTE GstPte;
234 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
235 AssertRC(rc);
236
237 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
238 pgmPoolTracDerefGCPhysHint(pPool, pPage,
239 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
240 GstPte.u & X86_PTE_PG_MASK,
241 iShw);
242 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
243 }
244 }
245 break;
246 }
247
248 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
249 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
250 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
251 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
252 {
253 unsigned iGst = off / sizeof(X86PDE);
254 unsigned iShwPdpt = iGst / 256;
255 unsigned iShw = (iGst % 256) * 2;
256 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
257
258 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
259 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
260 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
261 {
262 for (unsigned i = 0; i < 2; i++)
263 {
264 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw + i].u;
265 if (uPde & X86_PDE_P)
266 {
267 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw + i, uPde));
268 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw + i);
269 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw + i].u, 0);
270 }
271
272 /* paranoia / a bit assumptive. */
273 if ( (off & 3)
274 && (off & 3) + cbWrite > 4)
275 {
276 const unsigned iShw2 = iShw + 2 + i;
277 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
278 {
279 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
280 if (uPde2 & X86_PDE_P)
281 {
282 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
283 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
284 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
285 }
286 }
287 }
288 }
289 }
290 break;
291 }
292
293 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
294 {
295 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
296 const unsigned iShw = off / sizeof(X86PTEPAE);
297 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
298 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
299 {
300 X86PTEPAE GstPte;
301 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
302 AssertRC(rc);
303
304 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
305 pgmPoolTracDerefGCPhysHint(pPool, pPage,
306 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
307 GstPte.u & X86_PTE_PAE_PG_MASK,
308 iShw);
309 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
310 }
311
312 /* paranoia / a bit assumptive. */
313 if ( (off & 7)
314 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
315 {
316 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
317 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
318
319 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
320 {
321 X86PTEPAE GstPte;
322 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
323 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
324 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
325 AssertRC(rc);
326 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
327 pgmPoolTracDerefGCPhysHint(pPool, pPage,
328 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
329 GstPte.u & X86_PTE_PAE_PG_MASK,
330 iShw2);
331 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
332 }
333 }
334 break;
335 }
336
337 case PGMPOOLKIND_32BIT_PD:
338 {
339 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
340 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
341
342 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
343 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
344 X86PGUINT const uPde = uShw.pPD->a[iShw].u;
345 if (uPde & X86_PDE_P)
346 {
347 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
348 pgmPoolFree(pVM, uPde & X86_PDE_PG_MASK, pPage->idx, iShw);
349 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
350 }
351
352 /* paranoia / a bit assumptive. */
353 if ( (off & 3)
354 && (off & 3) + cbWrite > sizeof(X86PTE))
355 {
356 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
357 if ( iShw2 != iShw
358 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
359 {
360 X86PGUINT const uPde2 = uShw.pPD->a[iShw2].u;
361 if (uPde2 & X86_PDE_P)
362 {
363 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
364 pgmPoolFree(pVM, uPde2 & X86_PDE_PG_MASK, pPage->idx, iShw2);
365 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
366 }
367 }
368 }
369#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). - not working any longer... */
370 if ( uShw.pPD->a[iShw].n.u1Present
371 && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
372 {
373 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
374 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
375 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
376 }
377#endif
378 break;
379 }
380
381 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
382 {
383 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
384 const unsigned iShw = off / sizeof(X86PDEPAE);
385 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
386
387 /*
388 * Causes trouble when the guest uses a PDE to refer to the whole page table level
389 * structure. (Invalidate here; faults later on when it tries to change the page
390 * table entries -> recheck; probably only applies to the RC case.)
391 */
392 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw].u;
393 if (uPde & X86_PDE_P)
394 {
395 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
396 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
397 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
398 }
399
400 /* paranoia / a bit assumptive. */
401 if ( (off & 7)
402 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
403 {
404 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
405 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
406
407 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
408 if (uPde2 & X86_PDE_P)
409 {
410 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
411 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
412 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
413 }
414 }
415 break;
416 }
417
418 case PGMPOOLKIND_PAE_PDPT:
419 {
420 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
421 /*
422 * Hopefully this doesn't happen very often:
423 * - touching unused parts of the page
424 * - messing with the bits of pd pointers without changing the physical address
425 */
426 /* PDPT roots are not page aligned; 32 byte only! */
427 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
428
429 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
430 const unsigned iShw = offPdpt / sizeof(X86PDPE);
431 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
432 {
433 X86PGPAEUINT const uPdpe = uShw.pPDPT->a[iShw].u;
434 if (uPdpe & X86_PDPE_P)
435 {
436 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
437 pgmPoolFree(pVM, uPdpe & X86_PDPE_PG_MASK, pPage->idx, iShw);
438 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
439 }
440
441 /* paranoia / a bit assumptive. */
442 if ( (offPdpt & 7)
443 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
444 {
445 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
446 if ( iShw2 != iShw
447 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
448 {
449 X86PGPAEUINT const uPdpe2 = uShw.pPDPT->a[iShw2].u;
450 if (uPdpe2 & X86_PDPE_P)
451 {
452 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
453 pgmPoolFree(pVM, uPdpe2 & X86_PDPE_PG_MASK, pPage->idx, iShw2);
454 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
455 }
456 }
457 }
458 }
459 break;
460 }
461
462 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
463 {
464 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
465 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
466 const unsigned iShw = off / sizeof(X86PDEPAE);
467 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw].u;
468 if (uPde & X86_PDE_P)
469 {
470 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
471 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
472 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
473 }
474
475 /* paranoia / a bit assumptive. */
476 if ( (off & 7)
477 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
478 {
479 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
480 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
481 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
482 if (uPde2 & X86_PDE_P)
483 {
484 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
485 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
486 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
487 }
488 }
489 break;
490 }
491
492 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
493 {
494 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
495 /*
496 * Hopefully this doesn't happen very often:
497 * - messing with the bits of pd pointers without changing the physical address
498 */
499 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
500 const unsigned iShw = off / sizeof(X86PDPE);
501 X86PGPAEUINT const uPdpe = uShw.pPDPT->a[iShw].u;
502 if (uPdpe & X86_PDPE_P)
503 {
504 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uPdpe));
505 pgmPoolFree(pVM, uPdpe & X86_PDPE_PG_MASK, pPage->idx, iShw);
506 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
507 }
508 /* paranoia / a bit assumptive. */
509 if ( (off & 7)
510 && (off & 7) + cbWrite > sizeof(X86PDPE))
511 {
512 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
513 X86PGPAEUINT const uPdpe2 = uShw.pPDPT->a[iShw2].u;
514 if (uPdpe2 & X86_PDPE_P)
515 {
516 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPdpe2));
517 pgmPoolFree(pVM, uPdpe2 & X86_PDPE_PG_MASK, pPage->idx, iShw2);
518 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
519 }
520 }
521 break;
522 }
523
524 case PGMPOOLKIND_64BIT_PML4:
525 {
526 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
527 /*
528 * Hopefully this doesn't happen very often:
529 * - messing with the bits of pd pointers without changing the physical address
530 */
531 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
532 const unsigned iShw = off / sizeof(X86PDPE);
533 X86PGPAEUINT const uPml4e = uShw.pPML4->a[iShw].u;
534 if (uPml4e & X86_PML4E_P)
535 {
536 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uPml4e));
537 pgmPoolFree(pVM, uPml4e & X86_PML4E_PG_MASK, pPage->idx, iShw);
538 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
539 }
540 /* paranoia / a bit assumptive. */
541 if ( (off & 7)
542 && (off & 7) + cbWrite > sizeof(X86PDPE))
543 {
544 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
545 X86PGPAEUINT const uPml4e2 = uShw.pPML4->a[iShw2].u;
546 if (uPml4e2 & X86_PML4E_P)
547 {
548 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPml4e2));
549 pgmPoolFree(pVM, uPml4e2 & X86_PML4E_PG_MASK, pPage->idx, iShw2);
550 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
551 }
552 }
553 break;
554 }
555
556 default:
557 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
558 }
559 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
560
561 /* next */
562 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
563 return;
564 pPage = &pPool->aPages[pPage->iMonitoredNext];
565 }
566}
567
568#ifndef IN_RING3
569
570/**
571 * Checks if a access could be a fork operation in progress.
572 *
573 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
574 *
575 * @returns true if it's likely that we're forking, otherwise false.
576 * @param pPool The pool.
577 * @param pDis The disassembled instruction.
578 * @param offFault The access offset.
579 */
580DECLINLINE(bool) pgmRZPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
581{
582 /*
583 * i386 linux is using btr to clear X86_PTE_RW.
584 * The functions involved are (2.6.16 source inspection):
585 * clear_bit
586 * ptep_set_wrprotect
587 * copy_one_pte
588 * copy_pte_range
589 * copy_pmd_range
590 * copy_pud_range
591 * copy_page_range
592 * dup_mmap
593 * dup_mm
594 * copy_mm
595 * copy_process
596 * do_fork
597 */
598 if ( pDis->pCurInstr->uOpcode == OP_BTR
599 && !(offFault & 4)
600 /** @todo Validate that the bit index is X86_PTE_RW. */
601 )
602 {
603 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,Fork)); RT_NOREF_PV(pPool);
604 return true;
605 }
606 return false;
607}
608
609
610/**
611 * Determine whether the page is likely to have been reused.
612 *
613 * @returns true if we consider the page as being reused for a different purpose.
614 * @returns false if we consider it to still be a paging page.
615 * @param pVM The cross context VM structure.
616 * @param pVCpu The cross context virtual CPU structure.
617 * @param pRegFrame Trap register frame.
618 * @param pDis The disassembly info for the faulting instruction.
619 * @param pvFault The fault address.
620 * @param pPage The pool page being accessed.
621 *
622 * @remark The REP prefix check is left to the caller because of STOSD/W.
623 */
624DECLINLINE(bool) pgmRZPoolMonitorIsReused(PVMCC pVM, PVMCPUCC pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault,
625 PPGMPOOLPAGE pPage)
626{
627 /* Locked (CR3, PDPTR*4) should not be reusable. Considering them as
628 such may cause loops booting tst-ubuntu-15_10-64-efi, ++. */
629 if (pPage->cLocked)
630 {
631 Log2(("pgmRZPoolMonitorIsReused: %RGv (%p) can't have been resued, because it's locked!\n", pvFault, pPage));
632 return false;
633 }
634
635 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
636 if ( HMHasPendingIrq(pVM)
637 && pRegFrame->rsp - pvFault < 32)
638 {
639 /* Fault caused by stack writes while trying to inject an interrupt event. */
640 Log(("pgmRZPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
641 return true;
642 }
643
644 LogFlow(("Reused instr %RGv %d at %RGv param1.fUse=%llx param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->uOpcode, pvFault, pDis->Param1.fUse, pDis->Param1.Base.idxGenReg));
645
646 /* Non-supervisor mode write means it's used for something else. */
647 if (CPUMGetGuestCPL(pVCpu) == 3)
648 return true;
649
650 switch (pDis->pCurInstr->uOpcode)
651 {
652 /* call implies the actual push of the return address faulted */
653 case OP_CALL:
654 Log4(("pgmRZPoolMonitorIsReused: CALL\n"));
655 return true;
656 case OP_PUSH:
657 Log4(("pgmRZPoolMonitorIsReused: PUSH\n"));
658 return true;
659 case OP_PUSHF:
660 Log4(("pgmRZPoolMonitorIsReused: PUSHF\n"));
661 return true;
662 case OP_PUSHA:
663 Log4(("pgmRZPoolMonitorIsReused: PUSHA\n"));
664 return true;
665 case OP_FXSAVE:
666 Log4(("pgmRZPoolMonitorIsReused: FXSAVE\n"));
667 return true;
668 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
669 Log4(("pgmRZPoolMonitorIsReused: MOVNTI\n"));
670 return true;
671 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
672 Log4(("pgmRZPoolMonitorIsReused: MOVNTDQ\n"));
673 return true;
674 case OP_MOVSWD:
675 case OP_STOSWD:
676 if ( pDis->fPrefix == (DISPREFIX_REP|DISPREFIX_REX)
677 && pRegFrame->rcx >= 0x40
678 )
679 {
680 Assert(pDis->uCpuMode == DISCPUMODE_64BIT);
681
682 Log(("pgmRZPoolMonitorIsReused: OP_STOSQ\n"));
683 return true;
684 }
685 break;
686
687 default:
688 /*
689 * Anything having ESP on the left side means stack writes.
690 */
691 if ( ( (pDis->Param1.fUse & DISUSE_REG_GEN32)
692 || (pDis->Param1.fUse & DISUSE_REG_GEN64))
693 && (pDis->Param1.Base.idxGenReg == DISGREG_ESP))
694 {
695 Log4(("pgmRZPoolMonitorIsReused: ESP\n"));
696 return true;
697 }
698 break;
699 }
700
701 /*
702 * Page table updates are very very unlikely to be crossing page boundraries,
703 * and we don't want to deal with that in pgmPoolMonitorChainChanging and such.
704 */
705 uint32_t const cbWrite = DISGetParamSize(pDis, &pDis->Param1);
706 if ( (((uintptr_t)pvFault + cbWrite) >> X86_PAGE_SHIFT) != ((uintptr_t)pvFault >> X86_PAGE_SHIFT) )
707 {
708 Log4(("pgmRZPoolMonitorIsReused: cross page write\n"));
709 return true;
710 }
711
712 /*
713 * Nobody does an unaligned 8 byte write to a page table, right.
714 */
715 if (cbWrite >= 8 && ((uintptr_t)pvFault & 7) != 0)
716 {
717 Log4(("pgmRZPoolMonitorIsReused: Unaligned 8+ byte write\n"));
718 return true;
719 }
720
721 return false;
722}
723
724
725/**
726 * Flushes the page being accessed.
727 *
728 * @returns VBox status code suitable for scheduling.
729 * @param pVM The cross context VM structure.
730 * @param pVCpu The cross context virtual CPU structure.
731 * @param pPool The pool.
732 * @param pPage The pool page (head).
733 * @param pDis The disassembly of the write instruction.
734 * @param pRegFrame The trap register frame.
735 * @param GCPhysFault The fault address as guest physical address.
736 * @param pvFault The fault address.
737 * @todo VBOXSTRICTRC
738 */
739static int pgmRZPoolAccessPfHandlerFlush(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
740 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
741{
742 NOREF(pVM); NOREF(GCPhysFault);
743
744 /*
745 * First, do the flushing.
746 */
747 pgmPoolMonitorChainFlush(pPool, pPage);
748
749 /*
750 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
751 * Must do this in raw mode (!); XP boot will fail otherwise.
752 */
753 int rc = VINF_SUCCESS;
754 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
755 if (rc2 == VINF_SUCCESS)
756 { /* do nothing */ }
757 else if (rc2 == VINF_EM_RESCHEDULE)
758 {
759 rc = VBOXSTRICTRC_VAL(rc2);
760# ifndef IN_RING3
761 VMCPU_FF_SET(pVCpu, VMCPU_FF_TO_R3);
762# endif
763 }
764 else if (rc2 == VERR_EM_INTERPRETER)
765 {
766 rc = VINF_EM_RAW_EMULATE_INSTR;
767 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
768 }
769 else if (RT_FAILURE_NP(rc2))
770 rc = VBOXSTRICTRC_VAL(rc2);
771 else
772 AssertMsgFailed(("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
773
774 LogFlow(("pgmRZPoolAccessPfHandlerFlush: returns %Rrc (flushed)\n", rc));
775 return rc;
776}
777
778
779/**
780 * Handles the STOSD write accesses.
781 *
782 * @returns VBox status code suitable for scheduling.
783 * @param pVM The cross context VM structure.
784 * @param pPool The pool.
785 * @param pPage The pool page (head).
786 * @param pDis The disassembly of the write instruction.
787 * @param pRegFrame The trap register frame.
788 * @param GCPhysFault The fault address as guest physical address.
789 * @param pvFault The fault address.
790 */
791DECLINLINE(int) pgmRZPoolAccessPfHandlerSTOSD(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
792 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
793{
794 unsigned uIncrement = pDis->Param1.cb;
795 NOREF(pVM);
796
797 Assert(pDis->uCpuMode == DISCPUMODE_32BIT || pDis->uCpuMode == DISCPUMODE_64BIT);
798 Assert(pRegFrame->rcx <= 0x20);
799
800# ifdef VBOX_STRICT
801 if (pDis->uOpMode == DISCPUMODE_32BIT)
802 Assert(uIncrement == 4);
803 else
804 Assert(uIncrement == 8);
805# endif
806
807 Log3(("pgmRZPoolAccessPfHandlerSTOSD\n"));
808
809 /*
810 * Increment the modification counter and insert it into the list
811 * of modified pages the first time.
812 */
813 if (!pPage->cModifications++)
814 pgmPoolMonitorModifiedInsert(pPool, pPage);
815
816 /*
817 * Execute REP STOSD.
818 *
819 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
820 * write situation, meaning that it's safe to write here.
821 */
822 PVMCPUCC pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
823 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
824 while (pRegFrame->rcx)
825 {
826 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
827 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
828 pu32 += uIncrement;
829 GCPhysFault += uIncrement;
830 pRegFrame->rdi += uIncrement;
831 pRegFrame->rcx--;
832 }
833 pRegFrame->rip += pDis->cbInstr;
834
835 LogFlow(("pgmRZPoolAccessPfHandlerSTOSD: returns\n"));
836 return VINF_SUCCESS;
837}
838
839
840/**
841 * Handles the simple write accesses.
842 *
843 * @returns VBox status code suitable for scheduling.
844 * @param pVM The cross context VM structure.
845 * @param pVCpu The cross context virtual CPU structure.
846 * @param pPool The pool.
847 * @param pPage The pool page (head).
848 * @param pDis The disassembly of the write instruction.
849 * @param pRegFrame The trap register frame.
850 * @param GCPhysFault The fault address as guest physical address.
851 * @param pvFault The fault address.
852 * @param pfReused Reused state (in/out)
853 */
854DECLINLINE(int) pgmRZPoolAccessPfHandlerSimple(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
855 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
856{
857 Log3(("pgmRZPoolAccessPfHandlerSimple\n"));
858 NOREF(pVM);
859 NOREF(pfReused); /* initialized by caller */
860
861 /*
862 * Increment the modification counter and insert it into the list
863 * of modified pages the first time.
864 */
865 if (!pPage->cModifications++)
866 pgmPoolMonitorModifiedInsert(pPool, pPage);
867
868 /*
869 * Clear all the pages. ASSUMES that pvFault is readable.
870 */
871 uint32_t cbWrite = DISGetParamSize(pDis, &pDis->Param1);
872 if (cbWrite <= 8)
873 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, cbWrite);
874 else if (cbWrite <= 16)
875 {
876 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, 8);
877 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + 8, NULL, cbWrite - 8);
878 }
879 else
880 {
881 Assert(cbWrite <= 32);
882 for (uint32_t off = 0; off < cbWrite; off += 8)
883 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + off, NULL, RT_MIN(8, cbWrite - off));
884 }
885
886 /*
887 * Interpret the instruction.
888 */
889 VBOXSTRICTRC rc = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
890 if (RT_SUCCESS(rc))
891 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
892 else if (rc == VERR_EM_INTERPRETER)
893 {
894 LogFlow(("pgmRZPoolAccessPfHandlerSimple: Interpretation failed for %04x:%RGv - opcode=%d\n",
895 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode));
896 rc = VINF_EM_RAW_EMULATE_INSTR;
897 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
898 }
899
900# if 0 /* experimental code */
901 if (rc == VINF_SUCCESS)
902 {
903 switch (pPage->enmKind)
904 {
905 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
906 {
907 X86PTEPAE GstPte;
908 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
909 AssertRC(rc);
910
911 /* Check the new value written by the guest. If present and with a bogus physical address, then
912 * it's fairly safe to assume the guest is reusing the PT.
913 */
914 if (GstPte.n.u1Present)
915 {
916 RTHCPHYS HCPhys = -1;
917 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
918 if (rc != VINF_SUCCESS)
919 {
920 *pfReused = true;
921 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
922 }
923 }
924 break;
925 }
926 }
927 }
928# endif
929
930 LogFlow(("pgmRZPoolAccessPfHandlerSimple: returns %Rrc\n", VBOXSTRICTRC_VAL(rc)));
931 return VBOXSTRICTRC_VAL(rc);
932}
933
934
935/**
936 * @callback_method_impl{FNPGMRZPHYSPFHANDLER,
937 * \#PF access handler callback for page table pages.}
938 *
939 * @remarks The @a pvUser argument points to the PGMPOOLPAGE.
940 */
941DECLEXPORT(VBOXSTRICTRC) pgmRZPoolAccessPfHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame,
942 RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
943{
944 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorRZ, a);
945 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
946 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
947 unsigned cMaxModifications;
948 bool fForcedFlush = false;
949 NOREF(uErrorCode);
950
951 LogFlow(("pgmRZPoolAccessPfHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
952
953 PGM_LOCK_VOID(pVM);
954 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
955 {
956 /* Pool page changed while we were waiting for the lock; ignore. */
957 Log(("CPU%d: pgmRZPoolAccessPfHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
958 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
959 PGM_UNLOCK(pVM);
960 return VINF_SUCCESS;
961 }
962# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
963 if (pPage->fDirty)
964 {
965 Assert(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH));
966 PGM_UNLOCK(pVM);
967 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
968 }
969# endif
970
971# if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
972 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
973 {
974 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
975 void *pvGst;
976 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
977 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
978 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
979 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
980 }
981# endif
982
983 /*
984 * Disassemble the faulting instruction.
985 */
986 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
987 int rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
988 if (RT_UNLIKELY(rc != VINF_SUCCESS))
989 {
990 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
991 PGM_UNLOCK(pVM);
992 return rc;
993 }
994
995 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
996
997 /*
998 * We should ALWAYS have the list head as user parameter. This
999 * is because we use that page to record the changes.
1000 */
1001 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1002
1003# ifdef IN_RING0
1004 /* Maximum nr of modifications depends on the page type. */
1005 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1006 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1007 cMaxModifications = 4;
1008 else
1009 cMaxModifications = 24;
1010# else
1011 cMaxModifications = 48;
1012# endif
1013
1014 /*
1015 * Incremental page table updates should weigh more than random ones.
1016 * (Only applies when started from offset 0)
1017 */
1018 pVCpu->pgm.s.cPoolAccessHandler++;
1019 if ( pPage->GCPtrLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1020 && pPage->GCPtrLastAccessHandlerRip < pRegFrame->rip + 0x40
1021 && pvFault == (pPage->GCPtrLastAccessHandlerFault + pDis->Param1.cb)
1022 && pVCpu->pgm.s.cPoolAccessHandler == pPage->cLastAccessHandler + 1)
1023 {
1024 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1025 Assert(pPage->cModifications < 32000);
1026 pPage->cModifications = pPage->cModifications * 2;
1027 pPage->GCPtrLastAccessHandlerFault = pvFault;
1028 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1029 if (pPage->cModifications >= cMaxModifications)
1030 {
1031 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushReinit);
1032 fForcedFlush = true;
1033 }
1034 }
1035
1036 if (pPage->cModifications >= cMaxModifications)
1037 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1038
1039 /*
1040 * Check if it's worth dealing with.
1041 */
1042 bool fReused = false;
1043 bool fNotReusedNotForking = false;
1044 if ( ( pPage->cModifications < cMaxModifications /** @todo \#define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1045 || pgmPoolIsPageLocked(pPage)
1046 )
1047 && !(fReused = pgmRZPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault, pPage))
1048 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1049 {
1050 /*
1051 * Simple instructions, no REP prefix.
1052 */
1053 if (!(pDis->fPrefix & (DISPREFIX_REP | DISPREFIX_REPNE)))
1054 {
1055 rc = pgmRZPoolAccessPfHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1056 if (fReused)
1057 goto flushPage;
1058
1059 /* A mov instruction to change the first page table entry will be remembered so we can detect
1060 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1061 */
1062 if ( rc == VINF_SUCCESS
1063 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1064 && pDis->pCurInstr->uOpcode == OP_MOV
1065 && (pvFault & PAGE_OFFSET_MASK) == 0)
1066 {
1067 pPage->GCPtrLastAccessHandlerFault = pvFault;
1068 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1069 pPage->GCPtrLastAccessHandlerRip = pRegFrame->rip;
1070 /* Make sure we don't kick out a page too quickly. */
1071 if (pPage->cModifications > 8)
1072 pPage->cModifications = 2;
1073 }
1074 else if (pPage->GCPtrLastAccessHandlerFault == pvFault)
1075 {
1076 /* ignore the 2nd write to this page table entry. */
1077 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1078 }
1079 else
1080 {
1081 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
1082 pPage->GCPtrLastAccessHandlerRip = 0;
1083 }
1084
1085 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
1086 PGM_UNLOCK(pVM);
1087 return rc;
1088 }
1089
1090 /*
1091 * Windows is frequently doing small memset() operations (netio test 4k+).
1092 * We have to deal with these or we'll kill the cache and performance.
1093 */
1094 if ( pDis->pCurInstr->uOpcode == OP_STOSWD
1095 && !pRegFrame->eflags.Bits.u1DF
1096 && pDis->uOpMode == pDis->uCpuMode
1097 && pDis->uAddrMode == pDis->uCpuMode)
1098 {
1099 bool fValidStosd = false;
1100
1101 if ( pDis->uCpuMode == DISCPUMODE_32BIT
1102 && pDis->fPrefix == DISPREFIX_REP
1103 && pRegFrame->ecx <= 0x20
1104 && pRegFrame->ecx * 4 <= GUEST_PAGE_SIZE - ((uintptr_t)pvFault & GUEST_PAGE_OFFSET_MASK)
1105 && !((uintptr_t)pvFault & 3)
1106 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1107 )
1108 {
1109 fValidStosd = true;
1110 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1111 }
1112 else
1113 if ( pDis->uCpuMode == DISCPUMODE_64BIT
1114 && pDis->fPrefix == (DISPREFIX_REP | DISPREFIX_REX)
1115 && pRegFrame->rcx <= 0x20
1116 && pRegFrame->rcx * 8 <= GUEST_PAGE_SIZE - ((uintptr_t)pvFault & GUEST_PAGE_OFFSET_MASK)
1117 && !((uintptr_t)pvFault & 7)
1118 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1119 )
1120 {
1121 fValidStosd = true;
1122 }
1123
1124 if (fValidStosd)
1125 {
1126 rc = pgmRZPoolAccessPfHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1127 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZRepStosd, a);
1128 PGM_UNLOCK(pVM);
1129 return rc;
1130 }
1131 }
1132
1133 /* REP prefix, don't bother. */
1134 STAM_COUNTER_INC(&pPool->StatMonitorPfRZRepPrefix);
1135 Log4(("pgmRZPoolAccessPfHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1136 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode, pDis->fPrefix));
1137 fNotReusedNotForking = true;
1138 }
1139
1140# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1141 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1142 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1143 */
1144 if ( pPage->cModifications >= cMaxModifications
1145 && !fForcedFlush
1146 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1147 && ( fNotReusedNotForking
1148 || ( !pgmRZPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault, pPage)
1149 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1150 )
1151 )
1152 {
1153 Assert(!pgmPoolIsPageLocked(pPage));
1154 Assert(pPage->fDirty == false);
1155
1156 /* Flush any monitored duplicates as we will disable write protection. */
1157 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1158 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1159 {
1160 PPGMPOOLPAGE pPageHead = pPage;
1161
1162 /* Find the monitor head. */
1163 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1164 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1165
1166 while (pPageHead)
1167 {
1168 unsigned idxNext = pPageHead->iMonitoredNext;
1169
1170 if (pPageHead != pPage)
1171 {
1172 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1173 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1174 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1175 AssertRC(rc2);
1176 }
1177
1178 if (idxNext == NIL_PGMPOOL_IDX)
1179 break;
1180
1181 pPageHead = &pPool->aPages[idxNext];
1182 }
1183 }
1184
1185 /* The flushing above might fail for locked pages, so double check. */
1186 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1187 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1188 {
1189 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1190
1191 /* Temporarily allow write access to the page table again. */
1192 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK, pPage->GCPhys & PAGE_BASE_GC_MASK);
1193 if (rc == VINF_SUCCESS)
1194 {
1195 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1196 AssertMsg(rc == VINF_SUCCESS
1197 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1198 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1199 || rc == VERR_PAGE_NOT_PRESENT,
1200 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1201# ifdef VBOX_STRICT
1202 pPage->GCPtrDirtyFault = pvFault;
1203# endif
1204
1205 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, a);
1206 PGM_UNLOCK(pVM);
1207 return rc;
1208 }
1209 }
1210 }
1211# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT && IN_RING0 */
1212
1213 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushModOverflow);
1214flushPage:
1215 /*
1216 * Not worth it, so flush it.
1217 *
1218 * If we considered it to be reused, don't go back to ring-3
1219 * to emulate failed instructions since we usually cannot
1220 * interpret then. This may be a bit risky, in which case
1221 * the reuse detection must be fixed.
1222 */
1223 rc = pgmRZPoolAccessPfHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1224 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1225 && fReused)
1226 {
1227 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1228 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1229 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1230 }
1231 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZFlushPage, a);
1232 PGM_UNLOCK(pVM);
1233 return rc;
1234}
1235
1236#endif /* !IN_RING3 */
1237
1238/**
1239 * @callback_method_impl{FNPGMPHYSHANDLER,
1240 * Access handler for shadowed page table pages.}
1241 *
1242 * @remarks Only uses the VINF_PGM_HANDLER_DO_DEFAULT status.
1243 */
1244PGM_ALL_CB2_DECL(VBOXSTRICTRC)
1245pgmPoolAccessHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf,
1246 PGMACCESSTYPE enmAccessType, PGMACCESSORIGIN enmOrigin, void *pvUser)
1247{
1248 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1249 STAM_PROFILE_START(&pPool->CTX_SUFF_Z(StatMonitor), a);
1250 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1251 LogFlow(("PGM_ALL_CB_DECL: GCPhys=%RGp %p:{.Core=%RHp, .idx=%d, .GCPhys=%RGp, .enmType=%d}\n",
1252 GCPhys, pPage, pPage->Core.Key, pPage->idx, pPage->GCPhys, pPage->enmKind));
1253
1254 NOREF(pvPhys); NOREF(pvBuf); NOREF(enmAccessType);
1255
1256 PGM_LOCK_VOID(pVM);
1257
1258#ifdef VBOX_WITH_STATISTICS
1259 /*
1260 * Collect stats on the access.
1261 */
1262 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Sizes)) == 19);
1263 if (cbBuf <= 16 && cbBuf > 0)
1264 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[cbBuf - 1]);
1265 else if (cbBuf >= 17 && cbBuf < 32)
1266 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[16]);
1267 else if (cbBuf >= 32 && cbBuf < 64)
1268 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[17]);
1269 else if (cbBuf >= 64)
1270 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[18]);
1271
1272 uint8_t cbAlign;
1273 switch (pPage->enmKind)
1274 {
1275 default:
1276 cbAlign = 7;
1277 break;
1278 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1279 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1280 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1281 case PGMPOOLKIND_32BIT_PD:
1282 case PGMPOOLKIND_32BIT_PD_PHYS:
1283 cbAlign = 3;
1284 break;
1285 }
1286 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Misaligned)) == 7);
1287 if ((uint8_t)GCPhys & cbAlign)
1288 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Misaligned)[((uint8_t)GCPhys & cbAlign) - 1]);
1289#endif
1290
1291 /*
1292 * Make sure the pool page wasn't modified by a different CPU.
1293 */
1294 if (PHYS_PAGE_ADDRESS(GCPhys) == PHYS_PAGE_ADDRESS(pPage->GCPhys))
1295 {
1296 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1297
1298 /* The max modification count before flushing depends on the context and page type. */
1299#ifdef IN_RING3
1300 uint16_t const cMaxModifications = 96; /* it's cheaper here, right? */
1301#else
1302 uint16_t cMaxModifications;
1303 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1304 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1305 cMaxModifications = 4;
1306 else
1307 cMaxModifications = 24;
1308#endif
1309
1310 /*
1311 * We don't have to be very sophisticated about this since there are relativly few calls here.
1312 * However, we must try our best to detect any non-cpu accesses (disk / networking).
1313 */
1314 if ( ( pPage->cModifications < cMaxModifications
1315 || pgmPoolIsPageLocked(pPage) )
1316 && enmOrigin != PGMACCESSORIGIN_DEVICE
1317 && cbBuf <= 16)
1318 {
1319 /* Clear the shadow entry. */
1320 if (!pPage->cModifications++)
1321 pgmPoolMonitorModifiedInsert(pPool, pPage);
1322
1323 if (cbBuf <= 8)
1324 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, (uint32_t)cbBuf);
1325 else
1326 {
1327 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, 8);
1328 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys + 8, (uint8_t *)pvBuf + 8, (uint32_t)cbBuf - 8);
1329 }
1330 }
1331 else
1332 pgmPoolMonitorChainFlush(pPool, pPage);
1333
1334 STAM_PROFILE_STOP_EX(&pPool->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1335 }
1336 else
1337 Log(("CPU%d: PGM_ALL_CB_DECL pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhys), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1338 PGM_UNLOCK(pVM);
1339 return VINF_PGM_HANDLER_DO_DEFAULT;
1340}
1341
1342
1343#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1344
1345# if defined(VBOX_STRICT) && !defined(IN_RING3)
1346
1347/**
1348 * Check references to guest physical memory in a PAE / PAE page table.
1349 *
1350 * @param pPool The pool.
1351 * @param pPage The page.
1352 * @param pShwPT The shadow page table (mapping of the page).
1353 * @param pGstPT The guest page table.
1354 */
1355static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1356{
1357 unsigned cErrors = 0;
1358 int LastRc = -1; /* initialized to shut up gcc */
1359 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1360 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1361 PVMCC pVM = pPool->CTX_SUFF(pVM);
1362
1363# ifdef VBOX_STRICT
1364 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1365 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1366# endif
1367 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1368 {
1369 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1370 {
1371 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1372 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1373 if ( rc != VINF_SUCCESS
1374 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1375 {
1376 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1377 LastPTE = i;
1378 LastRc = rc;
1379 LastHCPhys = HCPhys;
1380 cErrors++;
1381
1382 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1383 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1384 AssertRC(rc);
1385
1386 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1387 {
1388 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1389
1390 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1391 {
1392 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1393
1394 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1395 {
1396 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1397 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1398 {
1399 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1400 }
1401 }
1402
1403 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1404 }
1405 }
1406 }
1407 }
1408 }
1409 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1410}
1411
1412
1413/**
1414 * Check references to guest physical memory in a PAE / 32-bit page table.
1415 *
1416 * @param pPool The pool.
1417 * @param pPage The page.
1418 * @param pShwPT The shadow page table (mapping of the page).
1419 * @param pGstPT The guest page table.
1420 */
1421static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1422{
1423 unsigned cErrors = 0;
1424 int LastRc = -1; /* initialized to shut up gcc */
1425 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1426 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1427 PVMCC pVM = pPool->CTX_SUFF(pVM);
1428
1429# ifdef VBOX_STRICT
1430 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1431 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1432# endif
1433 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1434 {
1435 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1436 {
1437 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1438 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1439 if ( rc != VINF_SUCCESS
1440 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1441 {
1442 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1443 LastPTE = i;
1444 LastRc = rc;
1445 LastHCPhys = HCPhys;
1446 cErrors++;
1447
1448 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1449 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1450 AssertRC(rc);
1451
1452 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1453 {
1454 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1455
1456 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1457 {
1458 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1459
1460 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1461 {
1462 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1463 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1464 {
1465 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1466 }
1467 }
1468
1469 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1470 }
1471 }
1472 }
1473 }
1474 }
1475 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1476}
1477
1478# endif /* VBOX_STRICT && !IN_RING3 */
1479
1480/**
1481 * Clear references to guest physical memory in a PAE / PAE page table.
1482 *
1483 * @returns nr of changed PTEs
1484 * @param pPool The pool.
1485 * @param pPage The page.
1486 * @param pShwPT The shadow page table (mapping of the page).
1487 * @param pGstPT The guest page table.
1488 * @param pOldGstPT The old cached guest page table.
1489 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1490 * @param pfFlush Flush reused page table (out)
1491 */
1492DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1493 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1494{
1495 unsigned cChanged = 0;
1496
1497# ifdef VBOX_STRICT
1498 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1499 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1500# endif
1501 *pfFlush = false;
1502
1503 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1504 {
1505 /* Check the new value written by the guest. If present and with a bogus physical address, then
1506 * it's fairly safe to assume the guest is reusing the PT.
1507 */
1508 if ( fAllowRemoval
1509 && (pGstPT->a[i].u & X86_PTE_P))
1510 {
1511 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1512 {
1513 *pfFlush = true;
1514 return ++cChanged;
1515 }
1516 }
1517 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1518 {
1519 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1520 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1521 {
1522# ifdef VBOX_STRICT
1523 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1524 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1525 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1526# endif
1527 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1528 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1529 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1530 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1531
1532 if ( uHostAttr == uGuestAttr
1533 && fHostRW <= fGuestRW)
1534 continue;
1535 }
1536 cChanged++;
1537 /* Something was changed, so flush it. */
1538 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1539 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1540 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1541 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1542 }
1543 }
1544 return cChanged;
1545}
1546
1547
1548/**
1549 * Clear references to guest physical memory in a PAE / PAE page table.
1550 *
1551 * @returns nr of changed PTEs
1552 * @param pPool The pool.
1553 * @param pPage The page.
1554 * @param pShwPT The shadow page table (mapping of the page).
1555 * @param pGstPT The guest page table.
1556 * @param pOldGstPT The old cached guest page table.
1557 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1558 * @param pfFlush Flush reused page table (out)
1559 */
1560DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1561 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1562{
1563 unsigned cChanged = 0;
1564
1565# ifdef VBOX_STRICT
1566 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1567 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1568# endif
1569 *pfFlush = false;
1570
1571 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1572 {
1573 /* Check the new value written by the guest. If present and with a bogus physical address, then
1574 * it's fairly safe to assume the guest is reusing the PT. */
1575 if (fAllowRemoval)
1576 {
1577 X86PGUINT const uPte = pGstPT->a[i].u;
1578 if ( (uPte & X86_PTE_P)
1579 && !PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), uPte & X86_PTE_PG_MASK))
1580 {
1581 *pfFlush = true;
1582 return ++cChanged;
1583 }
1584 }
1585 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1586 {
1587 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1588 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1589 {
1590# ifdef VBOX_STRICT
1591 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1592 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1593 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1594# endif
1595 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1596 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1597 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1598 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1599
1600 if ( uHostAttr == uGuestAttr
1601 && fHostRW <= fGuestRW)
1602 continue;
1603 }
1604 cChanged++;
1605 /* Something was changed, so flush it. */
1606 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1607 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1608 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1609 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1610 }
1611 }
1612 return cChanged;
1613}
1614
1615
1616/**
1617 * Flush a dirty page
1618 *
1619 * @param pVM The cross context VM structure.
1620 * @param pPool The pool.
1621 * @param idxSlot Dirty array slot index
1622 * @param fAllowRemoval Allow a reused page table to be removed
1623 */
1624static void pgmPoolFlushDirtyPage(PVMCC pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1625{
1626 AssertCompile(RT_ELEMENTS(pPool->aidxDirtyPages) == RT_ELEMENTS(pPool->aDirtyPages));
1627
1628 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1629 unsigned idxPage = pPool->aidxDirtyPages[idxSlot];
1630 if (idxPage == NIL_PGMPOOL_IDX)
1631 return;
1632
1633 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1634 Assert(pPage->idx == idxPage);
1635 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1636
1637 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1638 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1639
1640 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1641 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK);
1642 Assert(rc == VINF_SUCCESS);
1643 pPage->fDirty = false;
1644
1645# ifdef VBOX_STRICT
1646 uint64_t fFlags = 0;
1647 RTHCPHYS HCPhys;
1648 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->GCPtrDirtyFault, &fFlags, &HCPhys);
1649 AssertMsg( ( rc == VINF_SUCCESS
1650 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1651 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1652 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1653 || rc == VERR_PAGE_NOT_PRESENT,
1654 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->GCPtrDirtyFault, rc, fFlags));
1655# endif
1656
1657 /* Flush those PTEs that have changed. */
1658 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1659 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1660 void *pvGst;
1661 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1662 bool fFlush;
1663 unsigned cChanges;
1664
1665 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1666 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1667 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1668 else
1669 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1670 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1671
1672 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1673 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1674 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1675 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1676
1677 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1678 Assert(pPage->cModifications);
1679 if (cChanges < 4)
1680 pPage->cModifications = 1; /* must use > 0 here */
1681 else
1682 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1683
1684 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1685 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1686 pPool->idxFreeDirtyPage = idxSlot;
1687
1688 pPool->cDirtyPages--;
1689 pPool->aidxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1690 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1691 if (fFlush)
1692 {
1693 Assert(fAllowRemoval);
1694 Log(("Flush reused page table!\n"));
1695 pgmPoolFlushPage(pPool, pPage);
1696 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1697 }
1698 else
1699 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1700}
1701
1702
1703# ifndef IN_RING3
1704/**
1705 * Add a new dirty page
1706 *
1707 * @param pVM The cross context VM structure.
1708 * @param pPool The pool.
1709 * @param pPage The page.
1710 */
1711void pgmPoolAddDirtyPage(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1712{
1713 PGM_LOCK_ASSERT_OWNER(pVM);
1714 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1715 Assert(!pPage->fDirty);
1716
1717 unsigned idxFree = pPool->idxFreeDirtyPage;
1718 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1719 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1720
1721 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1722 {
1723 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1724 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1725 }
1726 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1727 AssertMsg(pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1728
1729 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1730
1731 /*
1732 * Make a copy of the guest page table as we require valid GCPhys addresses
1733 * when removing references to physical pages.
1734 * (The HCPhys linear lookup is *extremely* expensive!)
1735 */
1736 void *pvGst;
1737 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1738 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst,
1739 pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT ? PAGE_SIZE : PAGE_SIZE / 2);
1740# ifdef VBOX_STRICT
1741 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1742 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1743 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1744 else
1745 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1746 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1747# endif
1748 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1749
1750 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1751 pPage->fDirty = true;
1752 pPage->idxDirtyEntry = (uint8_t)idxFree; Assert(pPage->idxDirtyEntry == idxFree);
1753 pPool->aidxDirtyPages[idxFree] = pPage->idx;
1754 pPool->cDirtyPages++;
1755
1756 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1757 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1758 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1759 {
1760 unsigned i;
1761 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1762 {
1763 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1764 if (pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1765 {
1766 pPool->idxFreeDirtyPage = idxFree;
1767 break;
1768 }
1769 }
1770 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1771 }
1772
1773 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1774
1775 /*
1776 * Clear all references to this shadow table. See @bugref{7298}.
1777 */
1778 pgmPoolTrackClearPageUsers(pPool, pPage);
1779}
1780# endif /* !IN_RING3 */
1781
1782
1783/**
1784 * Check if the specified page is dirty (not write monitored)
1785 *
1786 * @return dirty or not
1787 * @param pVM The cross context VM structure.
1788 * @param GCPhys Guest physical address
1789 */
1790bool pgmPoolIsDirtyPageSlow(PVMCC pVM, RTGCPHYS GCPhys)
1791{
1792 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1793 PGM_LOCK_ASSERT_OWNER(pVM);
1794 if (!pPool->cDirtyPages)
1795 return false;
1796
1797 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1798
1799 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1800 {
1801 unsigned idxPage = pPool->aidxDirtyPages[i];
1802 if (idxPage != NIL_PGMPOOL_IDX)
1803 {
1804 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1805 if (pPage->GCPhys == GCPhys)
1806 return true;
1807 }
1808 }
1809 return false;
1810}
1811
1812
1813/**
1814 * Reset all dirty pages by reinstating page monitoring.
1815 *
1816 * @param pVM The cross context VM structure.
1817 */
1818void pgmPoolResetDirtyPages(PVMCC pVM)
1819{
1820 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1821 PGM_LOCK_ASSERT_OWNER(pVM);
1822 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1823
1824 if (!pPool->cDirtyPages)
1825 return;
1826
1827 Log(("pgmPoolResetDirtyPages\n"));
1828 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1829 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1830
1831 pPool->idxFreeDirtyPage = 0;
1832 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1833 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1834 {
1835 unsigned i;
1836 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1837 {
1838 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
1839 {
1840 pPool->idxFreeDirtyPage = i;
1841 break;
1842 }
1843 }
1844 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1845 }
1846
1847 Assert(pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
1848 return;
1849}
1850
1851
1852/**
1853 * Invalidate the PT entry for the specified page
1854 *
1855 * @param pVM The cross context VM structure.
1856 * @param GCPtrPage Guest page to invalidate
1857 */
1858void pgmPoolResetDirtyPage(PVMCC pVM, RTGCPTR GCPtrPage)
1859{
1860 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1861 PGM_LOCK_ASSERT_OWNER(pVM);
1862 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1863
1864 if (!pPool->cDirtyPages)
1865 return;
1866
1867 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage)); RT_NOREF_PV(GCPtrPage);
1868 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1869 {
1870 /** @todo What was intended here??? This looks incomplete... */
1871 }
1872}
1873
1874
1875/**
1876 * Reset all dirty pages by reinstating page monitoring.
1877 *
1878 * @param pVM The cross context VM structure.
1879 * @param GCPhysPT Physical address of the page table
1880 */
1881void pgmPoolInvalidateDirtyPage(PVMCC pVM, RTGCPHYS GCPhysPT)
1882{
1883 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1884 PGM_LOCK_ASSERT_OWNER(pVM);
1885 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1886 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
1887
1888 if (!pPool->cDirtyPages)
1889 return;
1890
1891 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1892
1893 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1894 {
1895 unsigned idxPage = pPool->aidxDirtyPages[i];
1896 if (idxPage != NIL_PGMPOOL_IDX)
1897 {
1898 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1899 if (pPage->GCPhys == GCPhysPT)
1900 {
1901 idxDirtyPage = i;
1902 break;
1903 }
1904 }
1905 }
1906
1907 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
1908 {
1909 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
1910 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1911 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1912 {
1913 unsigned i;
1914 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1915 {
1916 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
1917 {
1918 pPool->idxFreeDirtyPage = i;
1919 break;
1920 }
1921 }
1922 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1923 }
1924 }
1925}
1926
1927#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1928
1929/**
1930 * Inserts a page into the GCPhys hash table.
1931 *
1932 * @param pPool The pool.
1933 * @param pPage The page.
1934 */
1935DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1936{
1937 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1938 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1939 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1940 pPage->iNext = pPool->aiHash[iHash];
1941 pPool->aiHash[iHash] = pPage->idx;
1942}
1943
1944
1945/**
1946 * Removes a page from the GCPhys hash table.
1947 *
1948 * @param pPool The pool.
1949 * @param pPage The page.
1950 */
1951DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1952{
1953 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1954 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1955 if (pPool->aiHash[iHash] == pPage->idx)
1956 pPool->aiHash[iHash] = pPage->iNext;
1957 else
1958 {
1959 uint16_t iPrev = pPool->aiHash[iHash];
1960 for (;;)
1961 {
1962 const int16_t i = pPool->aPages[iPrev].iNext;
1963 if (i == pPage->idx)
1964 {
1965 pPool->aPages[iPrev].iNext = pPage->iNext;
1966 break;
1967 }
1968 if (i == NIL_PGMPOOL_IDX)
1969 {
1970 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
1971 break;
1972 }
1973 iPrev = i;
1974 }
1975 }
1976 pPage->iNext = NIL_PGMPOOL_IDX;
1977}
1978
1979
1980/**
1981 * Frees up one cache page.
1982 *
1983 * @returns VBox status code.
1984 * @retval VINF_SUCCESS on success.
1985 * @param pPool The pool.
1986 * @param iUser The user index.
1987 */
1988static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1989{
1990 const PVMCC pVM = pPool->CTX_SUFF(pVM);
1991 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1992 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1993
1994 /*
1995 * Select one page from the tail of the age list.
1996 */
1997 PPGMPOOLPAGE pPage;
1998 for (unsigned iLoop = 0; ; iLoop++)
1999 {
2000 uint16_t iToFree = pPool->iAgeTail;
2001 if (iToFree == iUser && iUser != NIL_PGMPOOL_IDX)
2002 iToFree = pPool->aPages[iToFree].iAgePrev;
2003/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2004 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2005 {
2006 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2007 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2008 {
2009 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2010 continue;
2011 iToFree = i;
2012 break;
2013 }
2014 }
2015*/
2016 Assert(iToFree != iUser);
2017 AssertReleaseMsg(iToFree != NIL_PGMPOOL_IDX,
2018 ("iToFree=%#x (iAgeTail=%#x) iUser=%#x iLoop=%u - pPool=%p LB %#zx\n",
2019 iToFree, pPool->iAgeTail, iUser, iLoop, pPool,
2020 RT_UOFFSETOF_DYN(PGMPOOL, aPages[pPool->cMaxPages])
2021 + pPool->cMaxUsers * sizeof(PGMPOOLUSER)
2022 + pPool->cMaxPhysExts * sizeof(PGMPOOLPHYSEXT) ));
2023
2024 pPage = &pPool->aPages[iToFree];
2025
2026 /*
2027 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2028 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2029 */
2030 if ( !pgmPoolIsPageLocked(pPage)
2031 && pPage->idx >= PGMPOOL_IDX_FIRST /* paranoia (#6349) */)
2032 break;
2033 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2034 pgmPoolCacheUsed(pPool, pPage);
2035 AssertLogRelReturn(iLoop < 8192, VERR_PGM_POOL_TOO_MANY_LOOPS);
2036 }
2037
2038 /*
2039 * Found a usable page, flush it and return.
2040 */
2041 int rc = pgmPoolFlushPage(pPool, pPage);
2042 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2043 /** @todo find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2044 if (rc == VINF_SUCCESS)
2045 PGM_INVL_ALL_VCPU_TLBS(pVM);
2046 return rc;
2047}
2048
2049
2050/**
2051 * Checks if a kind mismatch is really a page being reused
2052 * or if it's just normal remappings.
2053 *
2054 * @returns true if reused and the cached page (enmKind1) should be flushed
2055 * @returns false if not reused.
2056 * @param enmKind1 The kind of the cached page.
2057 * @param enmKind2 The kind of the requested page.
2058 */
2059static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2060{
2061 switch (enmKind1)
2062 {
2063 /*
2064 * Never reuse them. There is no remapping in non-paging mode.
2065 */
2066 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2067 case PGMPOOLKIND_32BIT_PD_PHYS:
2068 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2069 case PGMPOOLKIND_PAE_PD_PHYS:
2070 case PGMPOOLKIND_PAE_PDPT_PHYS:
2071 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2072 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2073 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2074 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2075 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2076 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2077 return false;
2078
2079 /*
2080 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2081 */
2082 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2083 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2084 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2085 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2086 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2087 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2088 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2089 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2090 case PGMPOOLKIND_32BIT_PD:
2091 case PGMPOOLKIND_PAE_PDPT:
2092 switch (enmKind2)
2093 {
2094 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2095 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2096 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2097 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2098 case PGMPOOLKIND_64BIT_PML4:
2099 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2100 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2101 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2102 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2103 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2104 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2105 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2106 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2107 return true;
2108 default:
2109 return false;
2110 }
2111
2112 /*
2113 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2114 */
2115 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2116 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2117 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2118 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2119 case PGMPOOLKIND_64BIT_PML4:
2120 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2121 switch (enmKind2)
2122 {
2123 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2124 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2125 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2126 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2127 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2128 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2129 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2130 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2131 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2132 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2133 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2134 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2135 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2136 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2137 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2138 return true;
2139 default:
2140 return false;
2141 }
2142
2143 /*
2144 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2145 */
2146 case PGMPOOLKIND_ROOT_NESTED:
2147 return false;
2148
2149 default:
2150 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2151 }
2152}
2153
2154
2155/**
2156 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2157 *
2158 * @returns VBox status code.
2159 * @retval VINF_PGM_CACHED_PAGE on success.
2160 * @retval VERR_FILE_NOT_FOUND if not found.
2161 * @param pPool The pool.
2162 * @param GCPhys The GC physical address of the page we're gonna shadow.
2163 * @param enmKind The kind of mapping.
2164 * @param enmAccess Access type for the mapping (only relevant for big pages)
2165 * @param fA20Enabled Whether the CPU has the A20 gate enabled.
2166 * @param iUser The shadow page pool index of the user table. This is
2167 * NIL_PGMPOOL_IDX for root pages.
2168 * @param iUserTable The index into the user table (shadowed). Ignored if
2169 * root page
2170 * @param ppPage Where to store the pointer to the page.
2171 */
2172static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
2173 uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2174{
2175 /*
2176 * Look up the GCPhys in the hash.
2177 */
2178 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2179 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2180 if (i != NIL_PGMPOOL_IDX)
2181 {
2182 do
2183 {
2184 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2185 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2186 if (pPage->GCPhys == GCPhys)
2187 {
2188 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2189 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess
2190 && pPage->fA20Enabled == fA20Enabled)
2191 {
2192 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2193 * doesn't flush it in case there are no more free use records.
2194 */
2195 pgmPoolCacheUsed(pPool, pPage);
2196
2197 int rc = VINF_SUCCESS;
2198 if (iUser != NIL_PGMPOOL_IDX)
2199 rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2200 if (RT_SUCCESS(rc))
2201 {
2202 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2203 *ppPage = pPage;
2204 if (pPage->cModifications)
2205 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2206 STAM_COUNTER_INC(&pPool->StatCacheHits);
2207 return VINF_PGM_CACHED_PAGE;
2208 }
2209 return rc;
2210 }
2211
2212 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2213 {
2214 /*
2215 * The kind is different. In some cases we should now flush the page
2216 * as it has been reused, but in most cases this is normal remapping
2217 * of PDs as PT or big pages using the GCPhys field in a slightly
2218 * different way than the other kinds.
2219 */
2220 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2221 {
2222 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2223 pgmPoolFlushPage(pPool, pPage);
2224 break;
2225 }
2226 }
2227 }
2228
2229 /* next */
2230 i = pPage->iNext;
2231 } while (i != NIL_PGMPOOL_IDX);
2232 }
2233
2234 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2235 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2236 return VERR_FILE_NOT_FOUND;
2237}
2238
2239
2240/**
2241 * Inserts a page into the cache.
2242 *
2243 * @param pPool The pool.
2244 * @param pPage The cached page.
2245 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2246 */
2247static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2248{
2249 /*
2250 * Insert into the GCPhys hash if the page is fit for that.
2251 */
2252 Assert(!pPage->fCached);
2253 if (fCanBeCached)
2254 {
2255 pPage->fCached = true;
2256 pgmPoolHashInsert(pPool, pPage);
2257 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2258 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2259 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2260 }
2261 else
2262 {
2263 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2264 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2265 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2266 }
2267
2268 /*
2269 * Insert at the head of the age list.
2270 */
2271 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2272 pPage->iAgeNext = pPool->iAgeHead;
2273 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2274 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2275 else
2276 pPool->iAgeTail = pPage->idx;
2277 pPool->iAgeHead = pPage->idx;
2278}
2279
2280
2281/**
2282 * Flushes a cached page.
2283 *
2284 * @param pPool The pool.
2285 * @param pPage The cached page.
2286 */
2287static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2288{
2289 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2290
2291 /*
2292 * Remove the page from the hash.
2293 */
2294 if (pPage->fCached)
2295 {
2296 pPage->fCached = false;
2297 pgmPoolHashRemove(pPool, pPage);
2298 }
2299 else
2300 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2301
2302 /*
2303 * Remove it from the age list.
2304 */
2305 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2306 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2307 else
2308 pPool->iAgeTail = pPage->iAgePrev;
2309 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2310 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2311 else
2312 pPool->iAgeHead = pPage->iAgeNext;
2313 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2314 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2315}
2316
2317
2318/**
2319 * Looks for pages sharing the monitor.
2320 *
2321 * @returns Pointer to the head page.
2322 * @returns NULL if not found.
2323 * @param pPool The Pool
2324 * @param pNewPage The page which is going to be monitored.
2325 */
2326static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2327{
2328 /*
2329 * Look up the GCPhys in the hash.
2330 */
2331 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2332 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2333 if (i == NIL_PGMPOOL_IDX)
2334 return NULL;
2335 do
2336 {
2337 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2338 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2339 && pPage != pNewPage)
2340 {
2341 switch (pPage->enmKind)
2342 {
2343 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2344 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2345 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2346 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2347 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2348 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2349 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2350 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2351 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2352 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2353 case PGMPOOLKIND_64BIT_PML4:
2354 case PGMPOOLKIND_32BIT_PD:
2355 case PGMPOOLKIND_PAE_PDPT:
2356 {
2357 /* find the head */
2358 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2359 {
2360 Assert(pPage->iMonitoredPrev != pPage->idx);
2361 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2362 }
2363 return pPage;
2364 }
2365
2366 /* ignore, no monitoring. */
2367 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2368 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2369 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2370 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2371 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2372 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2373 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2374 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2375 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2376 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2377 case PGMPOOLKIND_ROOT_NESTED:
2378 case PGMPOOLKIND_PAE_PD_PHYS:
2379 case PGMPOOLKIND_PAE_PDPT_PHYS:
2380 case PGMPOOLKIND_32BIT_PD_PHYS:
2381 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2382 break;
2383 default:
2384 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2385 }
2386 }
2387
2388 /* next */
2389 i = pPage->iNext;
2390 } while (i != NIL_PGMPOOL_IDX);
2391 return NULL;
2392}
2393
2394
2395/**
2396 * Enabled write monitoring of a guest page.
2397 *
2398 * @returns VBox status code.
2399 * @retval VINF_SUCCESS on success.
2400 * @param pPool The pool.
2401 * @param pPage The cached page.
2402 */
2403static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2404{
2405 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2406
2407 /*
2408 * Filter out the relevant kinds.
2409 */
2410 switch (pPage->enmKind)
2411 {
2412 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2413 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2414 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2415 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2416 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2417 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2418 case PGMPOOLKIND_64BIT_PML4:
2419 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2420 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2421 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2422 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2423 case PGMPOOLKIND_32BIT_PD:
2424 case PGMPOOLKIND_PAE_PDPT:
2425 break;
2426
2427 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2428 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2429 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2430 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2431 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2432 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2433 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2434 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2435 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2436 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2437 case PGMPOOLKIND_ROOT_NESTED:
2438 /* Nothing to monitor here. */
2439 return VINF_SUCCESS;
2440
2441 case PGMPOOLKIND_32BIT_PD_PHYS:
2442 case PGMPOOLKIND_PAE_PDPT_PHYS:
2443 case PGMPOOLKIND_PAE_PD_PHYS:
2444 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2445 /* Nothing to monitor here. */
2446 return VINF_SUCCESS;
2447 default:
2448 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2449 }
2450
2451 /*
2452 * Install handler.
2453 */
2454 int rc;
2455 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2456 if (pPageHead)
2457 {
2458 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2459 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2460
2461#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2462 if (pPageHead->fDirty)
2463 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirtyEntry, false /* do not remove */);
2464#endif
2465
2466 pPage->iMonitoredPrev = pPageHead->idx;
2467 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2468 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2469 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2470 pPageHead->iMonitoredNext = pPage->idx;
2471 rc = VINF_SUCCESS;
2472 }
2473 else
2474 {
2475 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2476 PVMCC pVM = pPool->CTX_SUFF(pVM);
2477 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2478 rc = PGMHandlerPhysicalRegister(pVM, GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK, pPool->hAccessHandlerType,
2479 pgmPoolConvertPageToR3(pPool, pPage), pgmPoolConvertPageToR0(pPool, pPage),
2480 NIL_RTRCPTR, NIL_RTR3PTR /*pszDesc*/);
2481 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2482 * the heap size should suffice. */
2483 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2484 PVMCPU pVCpu = VMMGetCpu(pVM);
2485 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2486 }
2487 pPage->fMonitored = true;
2488 return rc;
2489}
2490
2491
2492/**
2493 * Disables write monitoring of a guest page.
2494 *
2495 * @returns VBox status code.
2496 * @retval VINF_SUCCESS on success.
2497 * @param pPool The pool.
2498 * @param pPage The cached page.
2499 */
2500static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2501{
2502 /*
2503 * Filter out the relevant kinds.
2504 */
2505 switch (pPage->enmKind)
2506 {
2507 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2508 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2509 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2510 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2511 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2512 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2513 case PGMPOOLKIND_64BIT_PML4:
2514 case PGMPOOLKIND_32BIT_PD:
2515 case PGMPOOLKIND_PAE_PDPT:
2516 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2517 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2518 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2519 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2520 break;
2521
2522 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2523 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2524 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2525 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2526 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2527 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2528 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2529 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2530 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2531 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2532 case PGMPOOLKIND_ROOT_NESTED:
2533 case PGMPOOLKIND_PAE_PD_PHYS:
2534 case PGMPOOLKIND_PAE_PDPT_PHYS:
2535 case PGMPOOLKIND_32BIT_PD_PHYS:
2536 /* Nothing to monitor here. */
2537 Assert(!pPage->fMonitored);
2538 return VINF_SUCCESS;
2539
2540 default:
2541 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2542 }
2543 Assert(pPage->fMonitored);
2544
2545 /*
2546 * Remove the page from the monitored list or uninstall it if last.
2547 */
2548 const PVMCC pVM = pPool->CTX_SUFF(pVM);
2549 int rc;
2550 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2551 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2552 {
2553 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2554 {
2555 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2556 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2557 rc = PGMHandlerPhysicalChangeUserArgs(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK,
2558 pgmPoolConvertPageToR3(pPool, pNewHead),
2559 pgmPoolConvertPageToR0(pPool, pNewHead));
2560
2561 AssertFatalRCSuccess(rc);
2562 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2563 }
2564 else
2565 {
2566 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2567 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2568 {
2569 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2570 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2571 }
2572 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2573 rc = VINF_SUCCESS;
2574 }
2575 }
2576 else
2577 {
2578 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2579 AssertFatalRC(rc);
2580 PVMCPU pVCpu = VMMGetCpu(pVM);
2581 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2582 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2583 }
2584 pPage->fMonitored = false;
2585
2586 /*
2587 * Remove it from the list of modified pages (if in it).
2588 */
2589 pgmPoolMonitorModifiedRemove(pPool, pPage);
2590
2591 return rc;
2592}
2593
2594
2595/**
2596 * Inserts the page into the list of modified pages.
2597 *
2598 * @param pPool The pool.
2599 * @param pPage The page.
2600 */
2601void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2602{
2603 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2604 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2605 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2606 && pPool->iModifiedHead != pPage->idx,
2607 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2608 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2609 pPool->iModifiedHead, pPool->cModifiedPages));
2610
2611 pPage->iModifiedNext = pPool->iModifiedHead;
2612 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2613 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2614 pPool->iModifiedHead = pPage->idx;
2615 pPool->cModifiedPages++;
2616#ifdef VBOX_WITH_STATISTICS
2617 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2618 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2619#endif
2620}
2621
2622
2623/**
2624 * Removes the page from the list of modified pages and resets the
2625 * modification counter.
2626 *
2627 * @param pPool The pool.
2628 * @param pPage The page which is believed to be in the list of modified pages.
2629 */
2630static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2631{
2632 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2633 if (pPool->iModifiedHead == pPage->idx)
2634 {
2635 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2636 pPool->iModifiedHead = pPage->iModifiedNext;
2637 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2638 {
2639 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2640 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2641 }
2642 pPool->cModifiedPages--;
2643 }
2644 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2645 {
2646 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2647 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2648 {
2649 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2650 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2651 }
2652 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2653 pPool->cModifiedPages--;
2654 }
2655 else
2656 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2657 pPage->cModifications = 0;
2658}
2659
2660
2661/**
2662 * Zaps the list of modified pages, resetting their modification counters in the process.
2663 *
2664 * @param pVM The cross context VM structure.
2665 */
2666static void pgmPoolMonitorModifiedClearAll(PVMCC pVM)
2667{
2668 PGM_LOCK_VOID(pVM);
2669 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2670 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2671
2672 unsigned cPages = 0; NOREF(cPages);
2673
2674#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2675 pgmPoolResetDirtyPages(pVM);
2676#endif
2677
2678 uint16_t idx = pPool->iModifiedHead;
2679 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2680 while (idx != NIL_PGMPOOL_IDX)
2681 {
2682 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2683 idx = pPage->iModifiedNext;
2684 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2685 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2686 pPage->cModifications = 0;
2687 Assert(++cPages);
2688 }
2689 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2690 pPool->cModifiedPages = 0;
2691 PGM_UNLOCK(pVM);
2692}
2693
2694
2695/**
2696 * Handle SyncCR3 pool tasks
2697 *
2698 * @returns VBox status code.
2699 * @retval VINF_SUCCESS if successfully added.
2700 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2701 * @param pVCpu The cross context virtual CPU structure.
2702 * @remark Should only be used when monitoring is available, thus placed in
2703 * the PGMPOOL_WITH_MONITORING \#ifdef.
2704 */
2705int pgmPoolSyncCR3(PVMCPUCC pVCpu)
2706{
2707 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2708 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2709
2710 /*
2711 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2712 * Occasionally we will have to clear all the shadow page tables because we wanted
2713 * to monitor a page which was mapped by too many shadowed page tables. This operation
2714 * sometimes referred to as a 'lightweight flush'.
2715 */
2716# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2717 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2718 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2719# else /* !IN_RING3 */
2720 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2721 {
2722 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2723 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2724
2725 /* Make sure all other VCPUs return to ring 3. */
2726 if (pVM->cCpus > 1)
2727 {
2728 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2729 PGM_INVL_ALL_VCPU_TLBS(pVM);
2730 }
2731 return VINF_PGM_SYNC_CR3;
2732 }
2733# endif /* !IN_RING3 */
2734 else
2735 {
2736 pgmPoolMonitorModifiedClearAll(pVM);
2737
2738 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2739 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2740 {
2741 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2742 return pgmPoolSyncCR3(pVCpu);
2743 }
2744 }
2745 return VINF_SUCCESS;
2746}
2747
2748
2749/**
2750 * Frees up at least one user entry.
2751 *
2752 * @returns VBox status code.
2753 * @retval VINF_SUCCESS if successfully added.
2754 *
2755 * @param pPool The pool.
2756 * @param iUser The user index.
2757 */
2758static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2759{
2760 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2761 /*
2762 * Just free cached pages in a braindead fashion.
2763 */
2764 /** @todo walk the age list backwards and free the first with usage. */
2765 int rc = VINF_SUCCESS;
2766 do
2767 {
2768 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2769 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2770 rc = rc2;
2771 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2772 return rc;
2773}
2774
2775
2776/**
2777 * Inserts a page into the cache.
2778 *
2779 * This will create user node for the page, insert it into the GCPhys
2780 * hash, and insert it into the age list.
2781 *
2782 * @returns VBox status code.
2783 * @retval VINF_SUCCESS if successfully added.
2784 *
2785 * @param pPool The pool.
2786 * @param pPage The cached page.
2787 * @param GCPhys The GC physical address of the page we're gonna shadow.
2788 * @param iUser The user index.
2789 * @param iUserTable The user table index.
2790 */
2791DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2792{
2793 int rc = VINF_SUCCESS;
2794 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2795
2796 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable)); RT_NOREF_PV(GCPhys);
2797
2798 if (iUser != NIL_PGMPOOL_IDX)
2799 {
2800#ifdef VBOX_STRICT
2801 /*
2802 * Check that the entry doesn't already exists.
2803 */
2804 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2805 {
2806 uint16_t i = pPage->iUserHead;
2807 do
2808 {
2809 Assert(i < pPool->cMaxUsers);
2810 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2811 i = paUsers[i].iNext;
2812 } while (i != NIL_PGMPOOL_USER_INDEX);
2813 }
2814#endif
2815
2816 /*
2817 * Find free a user node.
2818 */
2819 uint16_t i = pPool->iUserFreeHead;
2820 if (i == NIL_PGMPOOL_USER_INDEX)
2821 {
2822 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2823 if (RT_FAILURE(rc))
2824 return rc;
2825 i = pPool->iUserFreeHead;
2826 }
2827
2828 /*
2829 * Unlink the user node from the free list,
2830 * initialize and insert it into the user list.
2831 */
2832 pPool->iUserFreeHead = paUsers[i].iNext;
2833 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2834 paUsers[i].iUser = iUser;
2835 paUsers[i].iUserTable = iUserTable;
2836 pPage->iUserHead = i;
2837 }
2838 else
2839 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2840
2841
2842 /*
2843 * Insert into cache and enable monitoring of the guest page if enabled.
2844 *
2845 * Until we implement caching of all levels, including the CR3 one, we'll
2846 * have to make sure we don't try monitor & cache any recursive reuse of
2847 * a monitored CR3 page. Because all windows versions are doing this we'll
2848 * have to be able to do combined access monitoring, CR3 + PT and
2849 * PD + PT (guest PAE).
2850 *
2851 * Update:
2852 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2853 */
2854 const bool fCanBeMonitored = true;
2855 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2856 if (fCanBeMonitored)
2857 {
2858 rc = pgmPoolMonitorInsert(pPool, pPage);
2859 AssertRC(rc);
2860 }
2861 return rc;
2862}
2863
2864
2865/**
2866 * Adds a user reference to a page.
2867 *
2868 * This will move the page to the head of the
2869 *
2870 * @returns VBox status code.
2871 * @retval VINF_SUCCESS if successfully added.
2872 *
2873 * @param pPool The pool.
2874 * @param pPage The cached page.
2875 * @param iUser The user index.
2876 * @param iUserTable The user table.
2877 */
2878static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2879{
2880 Log3(("pgmPoolTrackAddUser: GCPhys=%RGp iUser=%x iUserTable=%x\n", pPage->GCPhys, iUser, iUserTable));
2881 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2882 Assert(iUser != NIL_PGMPOOL_IDX);
2883
2884# ifdef VBOX_STRICT
2885 /*
2886 * Check that the entry doesn't already exists. We only allow multiple
2887 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
2888 */
2889 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2890 {
2891 uint16_t i = pPage->iUserHead;
2892 do
2893 {
2894 Assert(i < pPool->cMaxUsers);
2895 /** @todo this assertion looks odd... Shouldn't it be && here? */
2896 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2897 i = paUsers[i].iNext;
2898 } while (i != NIL_PGMPOOL_USER_INDEX);
2899 }
2900# endif
2901
2902 /*
2903 * Allocate a user node.
2904 */
2905 uint16_t i = pPool->iUserFreeHead;
2906 if (i == NIL_PGMPOOL_USER_INDEX)
2907 {
2908 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2909 if (RT_FAILURE(rc))
2910 return rc;
2911 i = pPool->iUserFreeHead;
2912 }
2913 pPool->iUserFreeHead = paUsers[i].iNext;
2914
2915 /*
2916 * Initialize the user node and insert it.
2917 */
2918 paUsers[i].iNext = pPage->iUserHead;
2919 paUsers[i].iUser = iUser;
2920 paUsers[i].iUserTable = iUserTable;
2921 pPage->iUserHead = i;
2922
2923# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2924 if (pPage->fDirty)
2925 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirtyEntry, false /* do not remove */);
2926# endif
2927
2928 /*
2929 * Tell the cache to update its replacement stats for this page.
2930 */
2931 pgmPoolCacheUsed(pPool, pPage);
2932 return VINF_SUCCESS;
2933}
2934
2935
2936/**
2937 * Frees a user record associated with a page.
2938 *
2939 * This does not clear the entry in the user table, it simply replaces the
2940 * user record to the chain of free records.
2941 *
2942 * @param pPool The pool.
2943 * @param pPage The shadow page.
2944 * @param iUser The shadow page pool index of the user table.
2945 * @param iUserTable The index into the user table (shadowed).
2946 *
2947 * @remarks Don't call this for root pages.
2948 */
2949static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2950{
2951 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2952 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2953 Assert(iUser != NIL_PGMPOOL_IDX);
2954
2955 /*
2956 * Unlink and free the specified user entry.
2957 */
2958
2959 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2960 uint16_t i = pPage->iUserHead;
2961 if ( i != NIL_PGMPOOL_USER_INDEX
2962 && paUsers[i].iUser == iUser
2963 && paUsers[i].iUserTable == iUserTable)
2964 {
2965 pPage->iUserHead = paUsers[i].iNext;
2966
2967 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2968 paUsers[i].iNext = pPool->iUserFreeHead;
2969 pPool->iUserFreeHead = i;
2970 return;
2971 }
2972
2973 /* General: Linear search. */
2974 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2975 while (i != NIL_PGMPOOL_USER_INDEX)
2976 {
2977 if ( paUsers[i].iUser == iUser
2978 && paUsers[i].iUserTable == iUserTable)
2979 {
2980 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2981 paUsers[iPrev].iNext = paUsers[i].iNext;
2982 else
2983 pPage->iUserHead = paUsers[i].iNext;
2984
2985 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2986 paUsers[i].iNext = pPool->iUserFreeHead;
2987 pPool->iUserFreeHead = i;
2988 return;
2989 }
2990 iPrev = i;
2991 i = paUsers[i].iNext;
2992 }
2993
2994 /* Fatal: didn't find it */
2995 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
2996 iUser, iUserTable, pPage->GCPhys));
2997}
2998
2999
3000#if 0 /* unused */
3001/**
3002 * Gets the entry size of a shadow table.
3003 *
3004 * @param enmKind The kind of page.
3005 *
3006 * @returns The size of the entry in bytes. That is, 4 or 8.
3007 * @returns If the kind is not for a table, an assertion is raised and 0 is
3008 * returned.
3009 */
3010DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
3011{
3012 switch (enmKind)
3013 {
3014 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3015 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3016 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3017 case PGMPOOLKIND_32BIT_PD:
3018 case PGMPOOLKIND_32BIT_PD_PHYS:
3019 return 4;
3020
3021 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3022 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3023 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3024 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3025 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3026 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3027 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3028 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3029 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3030 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3031 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3032 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3033 case PGMPOOLKIND_64BIT_PML4:
3034 case PGMPOOLKIND_PAE_PDPT:
3035 case PGMPOOLKIND_ROOT_NESTED:
3036 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3037 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3038 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3039 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3040 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3041 case PGMPOOLKIND_PAE_PD_PHYS:
3042 case PGMPOOLKIND_PAE_PDPT_PHYS:
3043 return 8;
3044
3045 default:
3046 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3047 }
3048}
3049#endif /* unused */
3050
3051#if 0 /* unused */
3052/**
3053 * Gets the entry size of a guest table.
3054 *
3055 * @param enmKind The kind of page.
3056 *
3057 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3058 * @returns If the kind is not for a table, an assertion is raised and 0 is
3059 * returned.
3060 */
3061DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3062{
3063 switch (enmKind)
3064 {
3065 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3066 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3067 case PGMPOOLKIND_32BIT_PD:
3068 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3069 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3070 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3071 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3072 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3073 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3074 return 4;
3075
3076 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3077 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3078 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3079 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3080 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3081 case PGMPOOLKIND_64BIT_PML4:
3082 case PGMPOOLKIND_PAE_PDPT:
3083 return 8;
3084
3085 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3086 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3087 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3088 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3089 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3090 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3091 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3092 case PGMPOOLKIND_ROOT_NESTED:
3093 case PGMPOOLKIND_PAE_PD_PHYS:
3094 case PGMPOOLKIND_PAE_PDPT_PHYS:
3095 case PGMPOOLKIND_32BIT_PD_PHYS:
3096 /** @todo can we return 0? (nobody is calling this...) */
3097 AssertFailed();
3098 return 0;
3099
3100 default:
3101 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3102 }
3103}
3104#endif /* unused */
3105
3106
3107/**
3108 * Checks one shadow page table entry for a mapping of a physical page.
3109 *
3110 * @returns true / false indicating removal of all relevant PTEs
3111 *
3112 * @param pVM The cross context VM structure.
3113 * @param pPhysPage The guest page in question.
3114 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3115 * @param iShw The shadow page table.
3116 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3117 */
3118static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3119{
3120 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3121 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3122 bool fRet = false;
3123
3124 /*
3125 * Assert sanity.
3126 */
3127 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3128 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3129 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3130
3131 /*
3132 * Then, clear the actual mappings to the page in the shadow PT.
3133 */
3134 switch (pPage->enmKind)
3135 {
3136 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3137 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3138 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3139 {
3140 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3141 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3142 uint32_t u32AndMask = 0;
3143 uint32_t u32OrMask = 0;
3144
3145 if (!fFlushPTEs)
3146 {
3147 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3148 {
3149 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3150 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3151 u32OrMask = X86_PTE_RW;
3152 u32AndMask = UINT32_MAX;
3153 fRet = true;
3154 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3155 break;
3156
3157 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3158 u32OrMask = 0;
3159 u32AndMask = ~X86_PTE_RW;
3160 fRet = true;
3161 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3162 break;
3163 default:
3164 /* (shouldn't be here, will assert below) */
3165 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3166 break;
3167 }
3168 }
3169 else
3170 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3171
3172 /* Update the counter if we're removing references. */
3173 if (!u32AndMask)
3174 {
3175 Assert(pPage->cPresent);
3176 Assert(pPool->cPresent);
3177 pPage->cPresent--;
3178 pPool->cPresent--;
3179 }
3180
3181 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3182 {
3183 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3184 X86PTE Pte;
3185 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3186 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3187 Pte.u &= ~(X86PGUINT)X86_PTE_RW; /* need to disallow writes when dirty bit tracking is still active. */
3188 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3189 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3190 return fRet;
3191 }
3192#ifdef LOG_ENABLED
3193 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3194 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3195 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3196 {
3197 Log(("i=%d cFound=%d\n", i, ++cFound));
3198 }
3199#endif
3200 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3201 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3202 break;
3203 }
3204
3205 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3206 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3207 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3208 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3209 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3210 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3211 {
3212 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3213 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3214 uint64_t u64OrMask = 0;
3215 uint64_t u64AndMask = 0;
3216
3217 if (!fFlushPTEs)
3218 {
3219 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3220 {
3221 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3222 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3223 u64OrMask = X86_PTE_RW;
3224 u64AndMask = UINT64_MAX;
3225 fRet = true;
3226 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3227 break;
3228
3229 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3230 u64OrMask = 0;
3231 u64AndMask = ~(uint64_t)X86_PTE_RW;
3232 fRet = true;
3233 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3234 break;
3235
3236 default:
3237 /* (shouldn't be here, will assert below) */
3238 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3239 break;
3240 }
3241 }
3242 else
3243 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3244
3245 /* Update the counter if we're removing references. */
3246 if (!u64AndMask)
3247 {
3248 Assert(pPage->cPresent);
3249 Assert(pPool->cPresent);
3250 pPage->cPresent--;
3251 pPool->cPresent--;
3252 }
3253
3254 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3255 {
3256 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3257 X86PTEPAE Pte;
3258 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3259 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3260 Pte.u &= ~(X86PGPAEUINT)X86_PTE_RW; /* need to disallow writes when dirty bit tracking is still active. */
3261
3262 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3263 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3264 return fRet;
3265 }
3266#ifdef LOG_ENABLED
3267 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3268 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3269 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3270 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3271 Log(("i=%d cFound=%d\n", i, ++cFound));
3272#endif
3273 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3274 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3275 break;
3276 }
3277
3278#ifdef PGM_WITH_LARGE_PAGES
3279 /* Large page case only. */
3280 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3281 {
3282 Assert(pVM->pgm.s.fNestedPaging);
3283
3284 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3285 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3286
3287 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3288 {
3289 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3290 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3291 pPD->a[iPte].u = 0;
3292 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3293
3294 /* Update the counter as we're removing references. */
3295 Assert(pPage->cPresent);
3296 Assert(pPool->cPresent);
3297 pPage->cPresent--;
3298 pPool->cPresent--;
3299
3300 return fRet;
3301 }
3302# ifdef LOG_ENABLED
3303 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3304 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3305 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3306 Log(("i=%d cFound=%d\n", i, ++cFound));
3307# endif
3308 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3309 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3310 break;
3311 }
3312
3313 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3314 case PGMPOOLKIND_PAE_PD_PHYS:
3315 {
3316 Assert(pVM->pgm.s.fNestedPaging);
3317
3318 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3319 PX86PDPAE pPD = (PX86PDPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3320
3321 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3322 {
3323 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3324 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3325 pPD->a[iPte].u = 0;
3326 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3327
3328 /* Update the counter as we're removing references. */
3329 Assert(pPage->cPresent);
3330 Assert(pPool->cPresent);
3331 pPage->cPresent--;
3332 pPool->cPresent--;
3333 return fRet;
3334 }
3335# ifdef LOG_ENABLED
3336 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3337 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3338 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3339 Log(("i=%d cFound=%d\n", i, ++cFound));
3340# endif
3341 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3342 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3343 break;
3344 }
3345#endif /* PGM_WITH_LARGE_PAGES */
3346
3347 default:
3348 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3349 }
3350
3351 /* not reached. */
3352#ifndef _MSC_VER
3353 return fRet;
3354#endif
3355}
3356
3357
3358/**
3359 * Scans one shadow page table for mappings of a physical page.
3360 *
3361 * @param pVM The cross context VM structure.
3362 * @param pPhysPage The guest page in question.
3363 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3364 * @param iShw The shadow page table.
3365 */
3366static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3367{
3368 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3369
3370 /* We should only come here with when there's only one reference to this physical page. */
3371 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3372
3373 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3374 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3375 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3376 if (!fKeptPTEs)
3377 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3378 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3379}
3380
3381
3382/**
3383 * Flushes a list of shadow page tables mapping the same physical page.
3384 *
3385 * @param pVM The cross context VM structure.
3386 * @param pPhysPage The guest page in question.
3387 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3388 * @param iPhysExt The physical cross reference extent list to flush.
3389 */
3390static void pgmPoolTrackFlushGCPhysPTs(PVMCC pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3391{
3392 PGM_LOCK_ASSERT_OWNER(pVM);
3393 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3394 bool fKeepList = false;
3395
3396 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3397 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt=%u\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3398
3399 const uint16_t iPhysExtStart = iPhysExt;
3400 PPGMPOOLPHYSEXT pPhysExt;
3401 do
3402 {
3403 Assert(iPhysExt < pPool->cMaxPhysExts);
3404 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3405 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3406 {
3407 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3408 {
3409 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3410 if (!fKeptPTEs)
3411 {
3412 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3413 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3414 }
3415 else
3416 fKeepList = true;
3417 }
3418 }
3419 /* next */
3420 iPhysExt = pPhysExt->iNext;
3421 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3422
3423 if (!fKeepList)
3424 {
3425 /* insert the list into the free list and clear the ram range entry. */
3426 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3427 pPool->iPhysExtFreeHead = iPhysExtStart;
3428 /* Invalidate the tracking data. */
3429 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3430 }
3431
3432 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3433}
3434
3435
3436/**
3437 * Flushes all shadow page table mappings of the given guest page.
3438 *
3439 * This is typically called when the host page backing the guest one has been
3440 * replaced or when the page protection was changed due to a guest access
3441 * caught by the monitoring.
3442 *
3443 * @returns VBox status code.
3444 * @retval VINF_SUCCESS if all references has been successfully cleared.
3445 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3446 * pool cleaning. FF and sync flags are set.
3447 *
3448 * @param pVM The cross context VM structure.
3449 * @param GCPhysPage GC physical address of the page in question
3450 * @param pPhysPage The guest page in question.
3451 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3452 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3453 * flushed, it is NOT touched if this isn't necessary.
3454 * The caller MUST initialized this to @a false.
3455 */
3456int pgmPoolTrackUpdateGCPhys(PVMCC pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3457{
3458 PVMCPUCC pVCpu = VMMGetCpu(pVM);
3459 PGM_LOCK_VOID(pVM);
3460 int rc = VINF_SUCCESS;
3461
3462#ifdef PGM_WITH_LARGE_PAGES
3463 /* Is this page part of a large page? */
3464 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3465 {
3466 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3467 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3468
3469 /* Fetch the large page base. */
3470 PPGMPAGE pLargePage;
3471 if (GCPhysBase != GCPhysPage)
3472 {
3473 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3474 AssertFatal(pLargePage);
3475 }
3476 else
3477 pLargePage = pPhysPage;
3478
3479 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3480
3481 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3482 {
3483 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3484 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3485 pVM->pgm.s.cLargePagesDisabled++;
3486
3487 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3488 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3489
3490 *pfFlushTLBs = true;
3491 PGM_UNLOCK(pVM);
3492 return rc;
3493 }
3494 }
3495#else
3496 NOREF(GCPhysPage);
3497#endif /* PGM_WITH_LARGE_PAGES */
3498
3499 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3500 if (u16)
3501 {
3502 /*
3503 * The zero page is currently screwing up the tracking and we'll
3504 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3505 * is defined, zero pages won't normally be mapped. Some kind of solution
3506 * will be needed for this problem of course, but it will have to wait...
3507 */
3508 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3509 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3510 rc = VINF_PGM_GCPHYS_ALIASED;
3511 else
3512 {
3513 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3514 {
3515 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3516 pgmPoolTrackFlushGCPhysPT(pVM,
3517 pPhysPage,
3518 fFlushPTEs,
3519 PGMPOOL_TD_GET_IDX(u16));
3520 }
3521 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3522 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3523 else
3524 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3525 *pfFlushTLBs = true;
3526 }
3527 }
3528
3529 if (rc == VINF_PGM_GCPHYS_ALIASED)
3530 {
3531 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3532 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3533 rc = VINF_PGM_SYNC_CR3;
3534 }
3535 PGM_UNLOCK(pVM);
3536 return rc;
3537}
3538
3539
3540/**
3541 * Scans all shadow page tables for mappings of a physical page.
3542 *
3543 * This may be slow, but it's most likely more efficient than cleaning
3544 * out the entire page pool / cache.
3545 *
3546 * @returns VBox status code.
3547 * @retval VINF_SUCCESS if all references has been successfully cleared.
3548 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3549 * a page pool cleaning.
3550 *
3551 * @param pVM The cross context VM structure.
3552 * @param pPhysPage The guest page in question.
3553 */
3554int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage)
3555{
3556 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3557 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3558 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3559 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3560
3561 /*
3562 * There is a limit to what makes sense.
3563 */
3564 if ( pPool->cPresent > 1024
3565 && pVM->cCpus == 1)
3566 {
3567 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3568 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3569 return VINF_PGM_GCPHYS_ALIASED;
3570 }
3571
3572 /*
3573 * Iterate all the pages until we've encountered all that in use.
3574 * This is simple but not quite optimal solution.
3575 */
3576 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage);
3577 unsigned cLeft = pPool->cUsedPages;
3578 unsigned iPage = pPool->cCurPages;
3579 while (--iPage >= PGMPOOL_IDX_FIRST)
3580 {
3581 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3582 if ( pPage->GCPhys != NIL_RTGCPHYS
3583 && pPage->cPresent)
3584 {
3585 switch (pPage->enmKind)
3586 {
3587 /*
3588 * We only care about shadow page tables.
3589 */
3590 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3591 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3592 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3593 {
3594 const uint32_t u32 = (uint32_t)u64;
3595 unsigned cPresent = pPage->cPresent;
3596 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3597 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3598 {
3599 const X86PGUINT uPte = pPT->a[i].u;
3600 if (uPte & X86_PTE_P)
3601 {
3602 if ((uPte & X86_PTE_PG_MASK) == u32)
3603 {
3604 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3605 ASMAtomicWriteU32(&pPT->a[i].u, 0);
3606
3607 /* Update the counter as we're removing references. */
3608 Assert(pPage->cPresent);
3609 Assert(pPool->cPresent);
3610 pPage->cPresent--;
3611 pPool->cPresent--;
3612 }
3613 if (!--cPresent)
3614 break;
3615 }
3616 }
3617 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3618 break;
3619 }
3620
3621 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3622 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3623 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3624 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3625 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3626 {
3627 unsigned cPresent = pPage->cPresent;
3628 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3629 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3630 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3631 {
3632 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & X86_PTE_PAE_PG_MASK) == u64)
3633 {
3634 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3635 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[i], 0); /// @todo why not atomic?
3636
3637 /* Update the counter as we're removing references. */
3638 Assert(pPage->cPresent);
3639 Assert(pPool->cPresent);
3640 pPage->cPresent--;
3641 pPool->cPresent--;
3642 }
3643 if (!--cPresent)
3644 break;
3645 }
3646 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3647 break;
3648 }
3649
3650 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3651 {
3652 unsigned cPresent = pPage->cPresent;
3653 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3654 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3655 {
3656 X86PGPAEUINT const uPte = pPT->a[i].u;
3657 if (uPte & EPT_E_READ)
3658 {
3659 if ((uPte & EPT_PTE_PG_MASK) == u64)
3660 {
3661 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3662 ASMAtomicWriteU64(&pPT->a[i].u, 0);
3663
3664 /* Update the counter as we're removing references. */
3665 Assert(pPage->cPresent);
3666 Assert(pPool->cPresent);
3667 pPage->cPresent--;
3668 pPool->cPresent--;
3669 }
3670 if (!--cPresent)
3671 break;
3672 }
3673 }
3674 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3675 break;
3676 }
3677 }
3678
3679 if (!--cLeft)
3680 break;
3681 }
3682 }
3683
3684 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3685 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3686
3687 /*
3688 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3689 */
3690 if (pPool->cPresent > 1024)
3691 {
3692 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3693 return VINF_PGM_GCPHYS_ALIASED;
3694 }
3695
3696 return VINF_SUCCESS;
3697}
3698
3699
3700/**
3701 * Clears the user entry in a user table.
3702 *
3703 * This is used to remove all references to a page when flushing it.
3704 */
3705static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3706{
3707 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3708 Assert(pUser->iUser < pPool->cCurPages);
3709 uint32_t iUserTable = pUser->iUserTable;
3710
3711 /*
3712 * Map the user page. Ignore references made by fictitious pages.
3713 */
3714 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3715 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3716 union
3717 {
3718 uint64_t *pau64;
3719 uint32_t *pau32;
3720 } u;
3721 if (pUserPage->idx < PGMPOOL_IDX_FIRST)
3722 {
3723 Assert(!pUserPage->pvPageR3);
3724 return;
3725 }
3726 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3727
3728
3729 /* Safety precaution in case we change the paging for other modes too in the future. */
3730 Assert(!pgmPoolIsPageLocked(pPage)); RT_NOREF_PV(pPage);
3731
3732#ifdef VBOX_STRICT
3733 /*
3734 * Some sanity checks.
3735 */
3736 switch (pUserPage->enmKind)
3737 {
3738 case PGMPOOLKIND_32BIT_PD:
3739 case PGMPOOLKIND_32BIT_PD_PHYS:
3740 Assert(iUserTable < X86_PG_ENTRIES);
3741 break;
3742 case PGMPOOLKIND_PAE_PDPT:
3743 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3744 case PGMPOOLKIND_PAE_PDPT_PHYS:
3745 Assert(iUserTable < 4);
3746 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3747 break;
3748 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3749 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3750 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3751 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3752 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3753 case PGMPOOLKIND_PAE_PD_PHYS:
3754 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3755 break;
3756 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3757 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3758 break;
3759 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3760 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3761 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3762 break;
3763 case PGMPOOLKIND_64BIT_PML4:
3764 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3765 /* GCPhys >> PAGE_SHIFT is the index here */
3766 break;
3767 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3768 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3769 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3770 break;
3771
3772 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3773 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3774 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3775 break;
3776
3777 case PGMPOOLKIND_ROOT_NESTED:
3778 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3779 break;
3780
3781 default:
3782 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3783 break;
3784 }
3785#endif /* VBOX_STRICT */
3786
3787 /*
3788 * Clear the entry in the user page.
3789 */
3790 switch (pUserPage->enmKind)
3791 {
3792 /* 32-bit entries */
3793 case PGMPOOLKIND_32BIT_PD:
3794 case PGMPOOLKIND_32BIT_PD_PHYS:
3795 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
3796 break;
3797
3798 /* 64-bit entries */
3799 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3800 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3801 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3802 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3803 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3804 case PGMPOOLKIND_PAE_PD_PHYS:
3805 case PGMPOOLKIND_PAE_PDPT_PHYS:
3806 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3807 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3808 case PGMPOOLKIND_64BIT_PML4:
3809 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3810 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3811 case PGMPOOLKIND_PAE_PDPT:
3812 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3813 case PGMPOOLKIND_ROOT_NESTED:
3814 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3815 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3816 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
3817 break;
3818
3819 default:
3820 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3821 }
3822 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
3823}
3824
3825
3826/**
3827 * Clears all users of a page.
3828 */
3829static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3830{
3831 /*
3832 * Free all the user records.
3833 */
3834 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3835
3836 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3837 uint16_t i = pPage->iUserHead;
3838 while (i != NIL_PGMPOOL_USER_INDEX)
3839 {
3840 /* Clear enter in user table. */
3841 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3842
3843 /* Free it. */
3844 const uint16_t iNext = paUsers[i].iNext;
3845 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3846 paUsers[i].iNext = pPool->iUserFreeHead;
3847 pPool->iUserFreeHead = i;
3848
3849 /* Next. */
3850 i = iNext;
3851 }
3852 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3853}
3854
3855
3856/**
3857 * Allocates a new physical cross reference extent.
3858 *
3859 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3860 * @param pVM The cross context VM structure.
3861 * @param piPhysExt Where to store the phys ext index.
3862 */
3863PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVMCC pVM, uint16_t *piPhysExt)
3864{
3865 PGM_LOCK_ASSERT_OWNER(pVM);
3866 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3867 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3868 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3869 {
3870 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3871 return NULL;
3872 }
3873 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3874 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3875 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3876 *piPhysExt = iPhysExt;
3877 return pPhysExt;
3878}
3879
3880
3881/**
3882 * Frees a physical cross reference extent.
3883 *
3884 * @param pVM The cross context VM structure.
3885 * @param iPhysExt The extent to free.
3886 */
3887void pgmPoolTrackPhysExtFree(PVMCC pVM, uint16_t iPhysExt)
3888{
3889 PGM_LOCK_ASSERT_OWNER(pVM);
3890 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3891 Assert(iPhysExt < pPool->cMaxPhysExts);
3892 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3893 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3894 {
3895 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3896 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3897 }
3898 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3899 pPool->iPhysExtFreeHead = iPhysExt;
3900}
3901
3902
3903/**
3904 * Frees a physical cross reference extent.
3905 *
3906 * @param pVM The cross context VM structure.
3907 * @param iPhysExt The extent to free.
3908 */
3909void pgmPoolTrackPhysExtFreeList(PVMCC pVM, uint16_t iPhysExt)
3910{
3911 PGM_LOCK_ASSERT_OWNER(pVM);
3912 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3913
3914 const uint16_t iPhysExtStart = iPhysExt;
3915 PPGMPOOLPHYSEXT pPhysExt;
3916 do
3917 {
3918 Assert(iPhysExt < pPool->cMaxPhysExts);
3919 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3920 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3921 {
3922 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3923 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3924 }
3925
3926 /* next */
3927 iPhysExt = pPhysExt->iNext;
3928 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3929
3930 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3931 pPool->iPhysExtFreeHead = iPhysExtStart;
3932}
3933
3934
3935/**
3936 * Insert a reference into a list of physical cross reference extents.
3937 *
3938 * @returns The new tracking data for PGMPAGE.
3939 *
3940 * @param pVM The cross context VM structure.
3941 * @param iPhysExt The physical extent index of the list head.
3942 * @param iShwPT The shadow page table index.
3943 * @param iPte Page table entry
3944 *
3945 */
3946static uint16_t pgmPoolTrackPhysExtInsert(PVMCC pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
3947{
3948 PGM_LOCK_ASSERT_OWNER(pVM);
3949 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3950 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3951
3952 /*
3953 * Special common cases.
3954 */
3955 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
3956 {
3957 paPhysExts[iPhysExt].aidx[1] = iShwPT;
3958 paPhysExts[iPhysExt].apte[1] = iPte;
3959 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
3960 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
3961 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3962 }
3963 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3964 {
3965 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3966 paPhysExts[iPhysExt].apte[2] = iPte;
3967 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
3968 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
3969 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3970 }
3971 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
3972
3973 /*
3974 * General treatment.
3975 */
3976 const uint16_t iPhysExtStart = iPhysExt;
3977 unsigned cMax = 15;
3978 for (;;)
3979 {
3980 Assert(iPhysExt < pPool->cMaxPhysExts);
3981 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3982 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3983 {
3984 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3985 paPhysExts[iPhysExt].apte[i] = iPte;
3986 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
3987 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
3988 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3989 }
3990 if (!--cMax)
3991 {
3992 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackOverflows);
3993 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3994 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
3995 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3996 }
3997
3998 /* advance */
3999 iPhysExt = paPhysExts[iPhysExt].iNext;
4000 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4001 break;
4002 }
4003
4004 /*
4005 * Add another extent to the list.
4006 */
4007 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4008 if (!pNew)
4009 {
4010 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackNoExtentsLeft);
4011 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4012 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4013 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4014 }
4015 pNew->iNext = iPhysExtStart;
4016 pNew->aidx[0] = iShwPT;
4017 pNew->apte[0] = iPte;
4018 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4019 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4020}
4021
4022
4023/**
4024 * Add a reference to guest physical page where extents are in use.
4025 *
4026 * @returns The new tracking data for PGMPAGE.
4027 *
4028 * @param pVM The cross context VM structure.
4029 * @param pPhysPage Pointer to the aPages entry in the ram range.
4030 * @param u16 The ram range flags (top 16-bits).
4031 * @param iShwPT The shadow page table index.
4032 * @param iPte Page table entry
4033 */
4034uint16_t pgmPoolTrackPhysExtAddref(PVMCC pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4035{
4036 PGM_LOCK_VOID(pVM);
4037 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4038 {
4039 /*
4040 * Convert to extent list.
4041 */
4042 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4043 uint16_t iPhysExt;
4044 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4045 if (pPhysExt)
4046 {
4047 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4048 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliased);
4049 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4050 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4051 pPhysExt->aidx[1] = iShwPT;
4052 pPhysExt->apte[1] = iPte;
4053 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4054 }
4055 else
4056 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4057 }
4058 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4059 {
4060 /*
4061 * Insert into the extent list.
4062 */
4063 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4064 }
4065 else
4066 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedLots);
4067 PGM_UNLOCK(pVM);
4068 return u16;
4069}
4070
4071
4072/**
4073 * Clear references to guest physical memory.
4074 *
4075 * @param pPool The pool.
4076 * @param pPage The page.
4077 * @param pPhysPage Pointer to the aPages entry in the ram range.
4078 * @param iPte Shadow PTE index
4079 */
4080void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4081{
4082 PVMCC pVM = pPool->CTX_SUFF(pVM);
4083 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4084 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4085
4086 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4087 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4088 {
4089 PGM_LOCK_VOID(pVM);
4090
4091 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4092 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4093 do
4094 {
4095 Assert(iPhysExt < pPool->cMaxPhysExts);
4096
4097 /*
4098 * Look for the shadow page and check if it's all freed.
4099 */
4100 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4101 {
4102 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4103 && paPhysExts[iPhysExt].apte[i] == iPte)
4104 {
4105 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4106 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4107
4108 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4109 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4110 {
4111 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4112 PGM_UNLOCK(pVM);
4113 return;
4114 }
4115
4116 /* we can free the node. */
4117 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4118 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4119 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4120 {
4121 /* lonely node */
4122 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4123 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4124 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4125 }
4126 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4127 {
4128 /* head */
4129 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4130 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4131 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4132 }
4133 else
4134 {
4135 /* in list */
4136 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4137 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4138 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4139 }
4140 iPhysExt = iPhysExtNext;
4141 PGM_UNLOCK(pVM);
4142 return;
4143 }
4144 }
4145
4146 /* next */
4147 iPhysExtPrev = iPhysExt;
4148 iPhysExt = paPhysExts[iPhysExt].iNext;
4149 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4150
4151 PGM_UNLOCK(pVM);
4152 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4153 }
4154 else /* nothing to do */
4155 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4156}
4157
4158/**
4159 * Clear references to guest physical memory.
4160 *
4161 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4162 * physical address is assumed to be correct, so the linear search can be
4163 * skipped and we can assert at an earlier point.
4164 *
4165 * @param pPool The pool.
4166 * @param pPage The page.
4167 * @param HCPhys The host physical address corresponding to the guest page.
4168 * @param GCPhys The guest physical address corresponding to HCPhys.
4169 * @param iPte Shadow PTE index
4170 */
4171static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4172{
4173 /*
4174 * Lookup the page and check if it checks out before derefing it.
4175 */
4176 PVMCC pVM = pPool->CTX_SUFF(pVM);
4177 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4178 if (pPhysPage)
4179 {
4180 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4181#ifdef LOG_ENABLED
4182 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4183 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4184#endif
4185 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4186 {
4187 Assert(pPage->cPresent);
4188 Assert(pPool->cPresent);
4189 pPage->cPresent--;
4190 pPool->cPresent--;
4191 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4192 return;
4193 }
4194
4195 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp\n",
4196 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage)));
4197 }
4198 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4199}
4200
4201
4202/**
4203 * Clear references to guest physical memory.
4204 *
4205 * @param pPool The pool.
4206 * @param pPage The page.
4207 * @param HCPhys The host physical address corresponding to the guest page.
4208 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4209 * @param iPte Shadow pte index
4210 */
4211void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4212{
4213 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4214
4215 /*
4216 * Try the hint first.
4217 */
4218 RTHCPHYS HCPhysHinted;
4219 PVMCC pVM = pPool->CTX_SUFF(pVM);
4220 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4221 if (pPhysPage)
4222 {
4223 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4224 Assert(HCPhysHinted);
4225 if (HCPhysHinted == HCPhys)
4226 {
4227 Assert(pPage->cPresent);
4228 Assert(pPool->cPresent);
4229 pPage->cPresent--;
4230 pPool->cPresent--;
4231 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4232 return;
4233 }
4234 }
4235 else
4236 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4237
4238 /*
4239 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4240 */
4241 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4242 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX);
4243 while (pRam)
4244 {
4245 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4246 while (iPage-- > 0)
4247 {
4248 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4249 {
4250 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4251 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4252 Assert(pPage->cPresent);
4253 Assert(pPool->cPresent);
4254 pPage->cPresent--;
4255 pPool->cPresent--;
4256 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4257 return;
4258 }
4259 }
4260 pRam = pRam->CTX_SUFF(pNext);
4261 }
4262
4263 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4264}
4265
4266
4267/**
4268 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4269 *
4270 * @param pPool The pool.
4271 * @param pPage The page.
4272 * @param pShwPT The shadow page table (mapping of the page).
4273 * @param pGstPT The guest page table.
4274 */
4275DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4276{
4277 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4278 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4279 {
4280 const X86PGUINT uPte = pShwPT->a[i].u;
4281 Assert(!(uPte & RT_BIT_32(10)));
4282 if (uPte & X86_PTE_P)
4283 {
4284 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4285 i, uPte & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4286 pgmPoolTracDerefGCPhysHint(pPool, pPage, uPte & X86_PTE_PG_MASK, pGstPT->a[i].u & fPgMask, i);
4287 if (!pPage->cPresent)
4288 break;
4289 }
4290 }
4291}
4292
4293
4294/**
4295 * Clear references to guest physical memory in a PAE / 32-bit page table.
4296 *
4297 * @param pPool The pool.
4298 * @param pPage The page.
4299 * @param pShwPT The shadow page table (mapping of the page).
4300 * @param pGstPT The guest page table (just a half one).
4301 */
4302DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4303{
4304 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4305 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4306 {
4307 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4308 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4309 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4310 {
4311 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4312 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4313 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4314 if (!pPage->cPresent)
4315 break;
4316 }
4317 }
4318}
4319
4320
4321/**
4322 * Clear references to guest physical memory in a PAE / PAE page table.
4323 *
4324 * @param pPool The pool.
4325 * @param pPage The page.
4326 * @param pShwPT The shadow page table (mapping of the page).
4327 * @param pGstPT The guest page table.
4328 */
4329DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4330{
4331 RTGCPHYS const fPgMask = pPage->fA20Enabled ? X86_PTE_PAE_PG_MASK : X86_PTE_PAE_PG_MASK & ~RT_BIT_64(20);
4332 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4333 {
4334 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4335 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4336 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4337 {
4338 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4339 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4340 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4341 if (!pPage->cPresent)
4342 break;
4343 }
4344 }
4345}
4346
4347
4348/**
4349 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4350 *
4351 * @param pPool The pool.
4352 * @param pPage The page.
4353 * @param pShwPT The shadow page table (mapping of the page).
4354 */
4355DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4356{
4357 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4358 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4359 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4360 {
4361 const X86PGUINT uPte = pShwPT->a[i].u;
4362 Assert(!(uPte & RT_BIT_32(10)));
4363 if (uPte & X86_PTE_P)
4364 {
4365 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4366 i, uPte & X86_PTE_PG_MASK, GCPhys));
4367 pgmPoolTracDerefGCPhys(pPool, pPage, uPte & X86_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4368 if (!pPage->cPresent)
4369 break;
4370 }
4371 }
4372}
4373
4374
4375/**
4376 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4377 *
4378 * @param pPool The pool.
4379 * @param pPage The page.
4380 * @param pShwPT The shadow page table (mapping of the page).
4381 */
4382DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4383{
4384 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4385 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4386 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4387 {
4388 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4389 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4390 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4391 {
4392 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4393 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4394 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys & GCPhysA20Mask, i);
4395 if (!pPage->cPresent)
4396 break;
4397 }
4398 }
4399}
4400
4401
4402/**
4403 * Clear references to shadowed pages in an EPT page table.
4404 *
4405 * @param pPool The pool.
4406 * @param pPage The page.
4407 * @param pShwPT The shadow page directory pointer table (mapping of the
4408 * page).
4409 */
4410DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4411{
4412 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4413 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4414 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4415 {
4416 X86PGPAEUINT const uPte = pShwPT->a[i].u;
4417 Assert((uPte & UINT64_C(0xfff0000000000f80)) == 0);
4418 if (uPte & EPT_E_READ)
4419 {
4420 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4421 i, uPte & EPT_PTE_PG_MASK, pPage->GCPhys));
4422 pgmPoolTracDerefGCPhys(pPool, pPage, uPte & EPT_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4423 if (!pPage->cPresent)
4424 break;
4425 }
4426 }
4427}
4428
4429
4430/**
4431 * Clear references to shadowed pages in a 32 bits page directory.
4432 *
4433 * @param pPool The pool.
4434 * @param pPage The page.
4435 * @param pShwPD The shadow page directory (mapping of the page).
4436 */
4437DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4438{
4439 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4440 {
4441 X86PGUINT const uPde = pShwPD->a[i].u;
4442 if (uPde & X86_PDE_P)
4443 {
4444 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4445 if (pSubPage)
4446 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4447 else
4448 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4449 }
4450 }
4451}
4452
4453
4454/**
4455 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4456 *
4457 * @param pPool The pool.
4458 * @param pPage The page.
4459 * @param pShwPD The shadow page directory (mapping of the page).
4460 */
4461DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4462{
4463 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4464 {
4465 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4466 if (uPde & X86_PDE_P)
4467 {
4468#ifdef PGM_WITH_LARGE_PAGES
4469 if (uPde & X86_PDE_PS)
4470 {
4471 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4472 i, uPde & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4473 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & X86_PDE2M_PAE_PG_MASK,
4474 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4475 i);
4476 }
4477 else
4478#endif
4479 {
4480 Assert((uPde & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000000))) == 0);
4481 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & X86_PDE_PAE_PG_MASK);
4482 if (pSubPage)
4483 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4484 else
4485 AssertFatalMsgFailed(("%RX64\n", uPde & X86_PDE_PAE_PG_MASK));
4486 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4487 }
4488 }
4489 }
4490}
4491
4492
4493/**
4494 * Clear references to shadowed pages in a PAE page directory pointer table.
4495 *
4496 * @param pPool The pool.
4497 * @param pPage The page.
4498 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4499 */
4500DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4501{
4502 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4503 {
4504 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4505 Assert((uPdpe & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4506 if (uPdpe & X86_PDPE_P)
4507 {
4508 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & X86_PDPE_PG_MASK);
4509 if (pSubPage)
4510 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4511 else
4512 AssertFatalMsgFailed(("%RX64\n", uPdpe & X86_PDPE_PG_MASK));
4513 }
4514 }
4515}
4516
4517
4518/**
4519 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4520 *
4521 * @param pPool The pool.
4522 * @param pPage The page.
4523 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4524 */
4525DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4526{
4527 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4528 {
4529 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4530 Assert((uPdpe & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4531 if (uPdpe & X86_PDPE_P)
4532 {
4533 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & X86_PDPE_PG_MASK);
4534 if (pSubPage)
4535 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4536 else
4537 AssertFatalMsgFailed(("%RX64\n", uPdpe & X86_PDPE_PG_MASK));
4538 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4539 }
4540 }
4541}
4542
4543
4544/**
4545 * Clear references to shadowed pages in a 64-bit level 4 page table.
4546 *
4547 * @param pPool The pool.
4548 * @param pPage The page.
4549 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4550 */
4551DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4552{
4553 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4554 {
4555 X86PGPAEUINT const uPml4e = pShwPML4->a[i].u;
4556 Assert((uPml4e & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4557 if (uPml4e & X86_PML4E_P)
4558 {
4559 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPml4e & X86_PDPE_PG_MASK);
4560 if (pSubPage)
4561 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4562 else
4563 AssertFatalMsgFailed(("%RX64\n", uPml4e & X86_PML4E_PG_MASK));
4564 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4565 }
4566 }
4567}
4568
4569
4570/**
4571 * Clear references to shadowed pages in an EPT page directory.
4572 *
4573 * @param pPool The pool.
4574 * @param pPage The page.
4575 * @param pShwPD The shadow page directory (mapping of the page).
4576 */
4577DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4578{
4579 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4580 {
4581 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4582 Assert((uPde & UINT64_C(0xfff0000000000f80)) == 0);
4583 if (uPde & EPT_E_READ)
4584 {
4585#ifdef PGM_WITH_LARGE_PAGES
4586 if (uPde & EPT_E_LEAF)
4587 {
4588 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4589 i, uPde & EPT_PDE2M_PG_MASK, pPage->GCPhys));
4590 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & EPT_PDE2M_PG_MASK,
4591 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4592 i);
4593 }
4594 else
4595#endif
4596 {
4597 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & EPT_PDE_PG_MASK);
4598 if (pSubPage)
4599 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4600 else
4601 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4602 }
4603 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4604 }
4605 }
4606}
4607
4608
4609/**
4610 * Clear references to shadowed pages in an EPT page directory pointer table.
4611 *
4612 * @param pPool The pool.
4613 * @param pPage The page.
4614 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4615 */
4616DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4617{
4618 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4619 {
4620 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4621 Assert((uPdpe & UINT64_C(0xfff0000000000f80)) == 0);
4622 if (uPdpe & EPT_E_READ)
4623 {
4624 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & EPT_PDPTE_PG_MASK);
4625 if (pSubPage)
4626 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4627 else
4628 AssertFatalMsgFailed(("%RX64\n", uPdpe & EPT_PDPTE_PG_MASK));
4629 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4630 }
4631 }
4632}
4633
4634
4635/**
4636 * Clears all references made by this page.
4637 *
4638 * This includes other shadow pages and GC physical addresses.
4639 *
4640 * @param pPool The pool.
4641 * @param pPage The page.
4642 */
4643static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4644{
4645 /*
4646 * Map the shadow page and take action according to the page kind.
4647 */
4648 PVMCC pVM = pPool->CTX_SUFF(pVM);
4649 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4650 switch (pPage->enmKind)
4651 {
4652 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4653 {
4654 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4655 void *pvGst;
4656 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4657 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4658 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4659 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4660 break;
4661 }
4662
4663 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4664 {
4665 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4666 void *pvGst;
4667 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4668 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
4669 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4670 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4671 break;
4672 }
4673
4674 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4675 {
4676 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4677 void *pvGst;
4678 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4679 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
4680 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4681 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4682 break;
4683 }
4684
4685 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4686 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4687 {
4688 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4689 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4690 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4691 break;
4692 }
4693
4694 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4695 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4696 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4697 {
4698 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4699 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
4700 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4701 break;
4702 }
4703
4704 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4705 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4706 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4707 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4708 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4709 case PGMPOOLKIND_PAE_PD_PHYS:
4710 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4711 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4712 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4713 break;
4714
4715 case PGMPOOLKIND_32BIT_PD_PHYS:
4716 case PGMPOOLKIND_32BIT_PD:
4717 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4718 break;
4719
4720 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4721 case PGMPOOLKIND_PAE_PDPT:
4722 case PGMPOOLKIND_PAE_PDPT_PHYS:
4723 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4724 break;
4725
4726 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4727 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4728 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4729 break;
4730
4731 case PGMPOOLKIND_64BIT_PML4:
4732 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4733 break;
4734
4735 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4736 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4737 break;
4738
4739 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4740 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4741 break;
4742
4743 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4744 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4745 break;
4746
4747 default:
4748 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4749 }
4750
4751 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4752 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4753 ASMMemZeroPage(pvShw);
4754 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4755 pPage->fZeroed = true;
4756 Assert(!pPage->cPresent);
4757 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
4758}
4759
4760
4761/**
4762 * Flushes a pool page.
4763 *
4764 * This moves the page to the free list after removing all user references to it.
4765 *
4766 * @returns VBox status code.
4767 * @retval VINF_SUCCESS on success.
4768 * @param pPool The pool.
4769 * @param pPage The shadow page.
4770 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4771 */
4772int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4773{
4774 PVMCC pVM = pPool->CTX_SUFF(pVM);
4775 bool fFlushRequired = false;
4776
4777 int rc = VINF_SUCCESS;
4778 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4779 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4780 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4781
4782 /*
4783 * Reject any attempts at flushing any of the special root pages (shall
4784 * not happen).
4785 */
4786 AssertMsgReturn(pPage->idx >= PGMPOOL_IDX_FIRST,
4787 ("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n",
4788 pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx),
4789 VINF_SUCCESS);
4790
4791 PGM_LOCK_VOID(pVM);
4792
4793 /*
4794 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4795 */
4796 if (pgmPoolIsPageLocked(pPage))
4797 {
4798 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4799 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4800 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4801 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4802 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4803 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4804 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4805 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4806 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
4807 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
4808 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4809 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4810 PGM_UNLOCK(pVM);
4811 return VINF_SUCCESS;
4812 }
4813
4814 /*
4815 * Mark the page as being in need of an ASMMemZeroPage().
4816 */
4817 pPage->fZeroed = false;
4818
4819#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4820 if (pPage->fDirty)
4821 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirtyEntry, false /* do not remove */);
4822#endif
4823
4824 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4825 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4826 fFlushRequired = true;
4827
4828 /*
4829 * Clear the page.
4830 */
4831 pgmPoolTrackClearPageUsers(pPool, pPage);
4832 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4833 pgmPoolTrackDeref(pPool, pPage);
4834 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4835
4836 /*
4837 * Flush it from the cache.
4838 */
4839 pgmPoolCacheFlushPage(pPool, pPage);
4840
4841 /*
4842 * Deregistering the monitoring.
4843 */
4844 if (pPage->fMonitored)
4845 rc = pgmPoolMonitorFlush(pPool, pPage);
4846
4847 /*
4848 * Free the page.
4849 */
4850 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4851 pPage->iNext = pPool->iFreeHead;
4852 pPool->iFreeHead = pPage->idx;
4853 pPage->enmKind = PGMPOOLKIND_FREE;
4854 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4855 pPage->GCPhys = NIL_RTGCPHYS;
4856 pPage->fReusedFlushPending = false;
4857
4858 pPool->cUsedPages--;
4859
4860 /* Flush the TLBs of all VCPUs if required. */
4861 if ( fFlushRequired
4862 && fFlush)
4863 {
4864 PGM_INVL_ALL_VCPU_TLBS(pVM);
4865 }
4866
4867 PGM_UNLOCK(pVM);
4868 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4869 return rc;
4870}
4871
4872
4873/**
4874 * Frees a usage of a pool page.
4875 *
4876 * The caller is responsible to updating the user table so that it no longer
4877 * references the shadow page.
4878 *
4879 * @param pPool The pool.
4880 * @param pPage The shadow page.
4881 * @param iUser The shadow page pool index of the user table.
4882 * NIL_PGMPOOL_IDX for root pages.
4883 * @param iUserTable The index into the user table (shadowed). Ignored if
4884 * root page.
4885 */
4886void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4887{
4888 PVMCC pVM = pPool->CTX_SUFF(pVM);
4889
4890 STAM_PROFILE_START(&pPool->StatFree, a);
4891 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
4892 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4893 AssertReturnVoid(pPage->idx >= PGMPOOL_IDX_FIRST); /* paranoia (#6349) */
4894
4895 PGM_LOCK_VOID(pVM);
4896 if (iUser != NIL_PGMPOOL_IDX)
4897 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4898 if (!pPage->fCached)
4899 pgmPoolFlushPage(pPool, pPage);
4900 PGM_UNLOCK(pVM);
4901 STAM_PROFILE_STOP(&pPool->StatFree, a);
4902}
4903
4904
4905/**
4906 * Makes one or more free page free.
4907 *
4908 * @returns VBox status code.
4909 * @retval VINF_SUCCESS on success.
4910 *
4911 * @param pPool The pool.
4912 * @param enmKind Page table kind
4913 * @param iUser The user of the page.
4914 */
4915static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4916{
4917 PVMCC pVM = pPool->CTX_SUFF(pVM);
4918 LogFlow(("pgmPoolMakeMoreFreePages: enmKind=%d iUser=%d\n", enmKind, iUser));
4919 NOREF(enmKind);
4920
4921 /*
4922 * If the pool isn't full grown yet, expand it.
4923 */
4924 if (pPool->cCurPages < pPool->cMaxPages)
4925 {
4926 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4927#ifdef IN_RING3
4928 int rc = PGMR3PoolGrow(pVM, VMMGetCpu(pVM));
4929#else
4930 int rc = PGMR0PoolGrow(pVM, VMMGetCpuId(pVM));
4931#endif
4932 if (RT_FAILURE(rc))
4933 return rc;
4934 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4935 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4936 return VINF_SUCCESS;
4937 }
4938
4939 /*
4940 * Free one cached page.
4941 */
4942 return pgmPoolCacheFreeOne(pPool, iUser);
4943}
4944
4945
4946/**
4947 * Allocates a page from the pool.
4948 *
4949 * This page may actually be a cached page and not in need of any processing
4950 * on the callers part.
4951 *
4952 * @returns VBox status code.
4953 * @retval VINF_SUCCESS if a NEW page was allocated.
4954 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4955 *
4956 * @param pVM The cross context VM structure.
4957 * @param GCPhys The GC physical address of the page we're gonna shadow.
4958 * For 4MB and 2MB PD entries, it's the first address the
4959 * shadow PT is covering.
4960 * @param enmKind The kind of mapping.
4961 * @param enmAccess Access type for the mapping (only relevant for big pages)
4962 * @param fA20Enabled Whether the A20 gate is enabled or not.
4963 * @param iUser The shadow page pool index of the user table. Root
4964 * pages should pass NIL_PGMPOOL_IDX.
4965 * @param iUserTable The index into the user table (shadowed). Ignored for
4966 * root pages (iUser == NIL_PGMPOOL_IDX).
4967 * @param fLockPage Lock the page
4968 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4969 */
4970int pgmPoolAlloc(PVMCC pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
4971 uint16_t iUser, uint32_t iUserTable, bool fLockPage, PPPGMPOOLPAGE ppPage)
4972{
4973 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4974 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4975 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4976 *ppPage = NULL;
4977 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4978 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4979 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4980
4981 PGM_LOCK_VOID(pVM);
4982
4983 if (pPool->fCacheEnabled)
4984 {
4985 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, fA20Enabled, iUser, iUserTable, ppPage);
4986 if (RT_SUCCESS(rc2))
4987 {
4988 if (fLockPage)
4989 pgmPoolLockPage(pPool, *ppPage);
4990 PGM_UNLOCK(pVM);
4991 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4992 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4993 return rc2;
4994 }
4995 }
4996
4997 /*
4998 * Allocate a new one.
4999 */
5000 int rc = VINF_SUCCESS;
5001 uint16_t iNew = pPool->iFreeHead;
5002 if (iNew == NIL_PGMPOOL_IDX)
5003 {
5004 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
5005 if (RT_FAILURE(rc))
5006 {
5007 PGM_UNLOCK(pVM);
5008 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
5009 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5010 return rc;
5011 }
5012 iNew = pPool->iFreeHead;
5013 AssertReleaseMsgReturn(iNew != NIL_PGMPOOL_IDX, ("iNew=%#x\n", iNew), VERR_PGM_POOL_IPE);
5014 }
5015
5016 /* unlink the free head */
5017 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5018 pPool->iFreeHead = pPage->iNext;
5019 pPage->iNext = NIL_PGMPOOL_IDX;
5020
5021 /*
5022 * Initialize it.
5023 */
5024 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5025 pPage->enmKind = enmKind;
5026 pPage->enmAccess = enmAccess;
5027 pPage->GCPhys = GCPhys;
5028 pPage->fA20Enabled = fA20Enabled;
5029 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5030 pPage->fMonitored = false;
5031 pPage->fCached = false;
5032 pPage->fDirty = false;
5033 pPage->fReusedFlushPending = false;
5034 pPage->cModifications = 0;
5035 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5036 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5037 pPage->cPresent = 0;
5038 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5039 pPage->idxDirtyEntry = 0;
5040 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5041 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5042 pPage->cLastAccessHandler = 0;
5043 pPage->cLocked = 0;
5044# ifdef VBOX_STRICT
5045 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5046# endif
5047
5048 /*
5049 * Insert into the tracking and cache. If this fails, free the page.
5050 */
5051 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5052 if (RT_FAILURE(rc3))
5053 {
5054 pPool->cUsedPages--;
5055 pPage->enmKind = PGMPOOLKIND_FREE;
5056 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5057 pPage->GCPhys = NIL_RTGCPHYS;
5058 pPage->iNext = pPool->iFreeHead;
5059 pPool->iFreeHead = pPage->idx;
5060 PGM_UNLOCK(pVM);
5061 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5062 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
5063 return rc3;
5064 }
5065
5066 /*
5067 * Commit the allocation, clear the page and return.
5068 */
5069#ifdef VBOX_WITH_STATISTICS
5070 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5071 pPool->cUsedPagesHigh = pPool->cUsedPages;
5072#endif
5073
5074 if (!pPage->fZeroed)
5075 {
5076 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5077 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5078 ASMMemZeroPage(pv);
5079 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5080 }
5081
5082 *ppPage = pPage;
5083 if (fLockPage)
5084 pgmPoolLockPage(pPool, pPage);
5085 PGM_UNLOCK(pVM);
5086 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5087 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5088 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5089 return rc;
5090}
5091
5092
5093/**
5094 * Frees a usage of a pool page.
5095 *
5096 * @param pVM The cross context VM structure.
5097 * @param HCPhys The HC physical address of the shadow page.
5098 * @param iUser The shadow page pool index of the user table.
5099 * NIL_PGMPOOL_IDX if root page.
5100 * @param iUserTable The index into the user table (shadowed). Ignored if
5101 * root page.
5102 */
5103void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5104{
5105 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5106 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5107 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5108}
5109
5110
5111/**
5112 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5113 *
5114 * @returns Pointer to the shadow page structure.
5115 * @param pPool The pool.
5116 * @param HCPhys The HC physical address of the shadow page.
5117 */
5118PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5119{
5120 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5121
5122 /*
5123 * Look up the page.
5124 */
5125 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5126
5127 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5128 return pPage;
5129}
5130
5131
5132/**
5133 * Internal worker for finding a page for debugging purposes, no assertions.
5134 *
5135 * @returns Pointer to the shadow page structure. NULL on if not found.
5136 * @param pPool The pool.
5137 * @param HCPhys The HC physical address of the shadow page.
5138 */
5139PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5140{
5141 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5142 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5143}
5144
5145
5146/**
5147 * Internal worker for PGM_HCPHYS_2_PTR.
5148 *
5149 * @returns VBox status code.
5150 * @param pVM The cross context VM structure.
5151 * @param HCPhys The HC physical address of the shadow page.
5152 * @param ppv Where to return the address.
5153 */
5154int pgmPoolHCPhys2Ptr(PVM pVM, RTHCPHYS HCPhys, void **ppv)
5155{
5156 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pVM->pgm.s.CTX_SUFF(pPool)->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5157 AssertMsgReturn(pPage && pPage->enmKind != PGMPOOLKIND_FREE,
5158 ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0),
5159 VERR_PGM_POOL_GET_PAGE_FAILED);
5160 *ppv = (uint8_t *)pPage->CTX_SUFF(pvPage) + (HCPhys & PAGE_OFFSET_MASK);
5161 return VINF_SUCCESS;
5162}
5163
5164#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5165
5166/**
5167 * Flush the specified page if present
5168 *
5169 * @param pVM The cross context VM structure.
5170 * @param GCPhys Guest physical address of the page to flush
5171 */
5172void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5173{
5174 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5175
5176 VM_ASSERT_EMT(pVM);
5177
5178 /*
5179 * Look up the GCPhys in the hash.
5180 */
5181 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5182 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5183 if (i == NIL_PGMPOOL_IDX)
5184 return;
5185
5186 do
5187 {
5188 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5189 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5190 {
5191 switch (pPage->enmKind)
5192 {
5193 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5194 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5195 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5196 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5197 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5198 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5199 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5200 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5201 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5202 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5203 case PGMPOOLKIND_64BIT_PML4:
5204 case PGMPOOLKIND_32BIT_PD:
5205 case PGMPOOLKIND_PAE_PDPT:
5206 {
5207 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5208# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5209 if (pPage->fDirty)
5210 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5211 else
5212# endif
5213 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5214 Assert(!pgmPoolIsPageLocked(pPage));
5215 pgmPoolMonitorChainFlush(pPool, pPage);
5216 return;
5217 }
5218
5219 /* ignore, no monitoring. */
5220 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5221 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5222 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5223 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5224 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5225 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5226 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5227 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5228 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5229 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5230 case PGMPOOLKIND_ROOT_NESTED:
5231 case PGMPOOLKIND_PAE_PD_PHYS:
5232 case PGMPOOLKIND_PAE_PDPT_PHYS:
5233 case PGMPOOLKIND_32BIT_PD_PHYS:
5234 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5235 break;
5236
5237 default:
5238 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5239 }
5240 }
5241
5242 /* next */
5243 i = pPage->iNext;
5244 } while (i != NIL_PGMPOOL_IDX);
5245 return;
5246}
5247
5248
5249/**
5250 * Reset CPU on hot plugging.
5251 *
5252 * @param pVM The cross context VM structure.
5253 * @param pVCpu The cross context virtual CPU structure.
5254 */
5255void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5256{
5257 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5258
5259 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5260 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5261 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5262}
5263
5264
5265/**
5266 * Flushes the entire cache.
5267 *
5268 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5269 * this and execute this CR3 flush.
5270 *
5271 * @param pVM The cross context VM structure.
5272 */
5273void pgmR3PoolReset(PVM pVM)
5274{
5275 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5276
5277 PGM_LOCK_ASSERT_OWNER(pVM);
5278 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5279 LogFlow(("pgmR3PoolReset:\n"));
5280
5281 /*
5282 * If there are no pages in the pool, there is nothing to do.
5283 */
5284 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5285 {
5286 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5287 return;
5288 }
5289
5290 /*
5291 * Exit the shadow mode since we're going to clear everything,
5292 * including the root page.
5293 */
5294 VMCC_FOR_EACH_VMCPU(pVM)
5295 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5296 VMCC_FOR_EACH_VMCPU_END(pVM);
5297
5298
5299 /*
5300 * Nuke the free list and reinsert all pages into it.
5301 */
5302 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5303 {
5304 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5305
5306 if (pPage->fMonitored)
5307 pgmPoolMonitorFlush(pPool, pPage);
5308 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5309 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5310 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5311 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5312 pPage->GCPhys = NIL_RTGCPHYS;
5313 pPage->enmKind = PGMPOOLKIND_FREE;
5314 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5315 Assert(pPage->idx == i);
5316 pPage->iNext = i + 1;
5317 pPage->fA20Enabled = true;
5318 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5319 pPage->fSeenNonGlobal = false;
5320 pPage->fMonitored = false;
5321 pPage->fDirty = false;
5322 pPage->fCached = false;
5323 pPage->fReusedFlushPending = false;
5324 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5325 pPage->cPresent = 0;
5326 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5327 pPage->cModifications = 0;
5328 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5329 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5330 pPage->idxDirtyEntry = 0;
5331 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5332 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5333 pPage->cLastAccessHandler = 0;
5334 pPage->cLocked = 0;
5335# ifdef VBOX_STRICT
5336 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5337# endif
5338 }
5339 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5340 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5341 pPool->cUsedPages = 0;
5342
5343 /*
5344 * Zap and reinitialize the user records.
5345 */
5346 pPool->cPresent = 0;
5347 pPool->iUserFreeHead = 0;
5348 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5349 const unsigned cMaxUsers = pPool->cMaxUsers;
5350 for (unsigned i = 0; i < cMaxUsers; i++)
5351 {
5352 paUsers[i].iNext = i + 1;
5353 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5354 paUsers[i].iUserTable = 0xfffffffe;
5355 }
5356 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5357
5358 /*
5359 * Clear all the GCPhys links and rebuild the phys ext free list.
5360 */
5361 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
5362 pRam;
5363 pRam = pRam->CTX_SUFF(pNext))
5364 {
5365 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5366 while (iPage-- > 0)
5367 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5368 }
5369
5370 pPool->iPhysExtFreeHead = 0;
5371 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5372 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5373 for (unsigned i = 0; i < cMaxPhysExts; i++)
5374 {
5375 paPhysExts[i].iNext = i + 1;
5376 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5377 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5378 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5379 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5380 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5381 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5382 }
5383 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5384
5385 /*
5386 * Just zap the modified list.
5387 */
5388 pPool->cModifiedPages = 0;
5389 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5390
5391 /*
5392 * Clear the GCPhys hash and the age list.
5393 */
5394 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5395 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5396 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5397 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5398
5399# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5400 /* Clear all dirty pages. */
5401 pPool->idxFreeDirtyPage = 0;
5402 pPool->cDirtyPages = 0;
5403 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aidxDirtyPages); i++)
5404 pPool->aidxDirtyPages[i] = NIL_PGMPOOL_IDX;
5405# endif
5406
5407 /*
5408 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5409 */
5410 VMCC_FOR_EACH_VMCPU(pVM)
5411 {
5412 /*
5413 * Re-enter the shadowing mode and assert Sync CR3 FF.
5414 */
5415 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5416 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5417 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5418 }
5419 VMCC_FOR_EACH_VMCPU_END(pVM);
5420
5421 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5422}
5423
5424#endif /* IN_RING3 */
5425
5426#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5427/**
5428 * Stringifies a PGMPOOLKIND value.
5429 */
5430static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5431{
5432 switch ((PGMPOOLKIND)enmKind)
5433 {
5434 case PGMPOOLKIND_INVALID:
5435 return "PGMPOOLKIND_INVALID";
5436 case PGMPOOLKIND_FREE:
5437 return "PGMPOOLKIND_FREE";
5438 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5439 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5440 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5441 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5442 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5443 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5444 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5445 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5446 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5447 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5448 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5449 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5450 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5451 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5452 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5453 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5454 case PGMPOOLKIND_32BIT_PD:
5455 return "PGMPOOLKIND_32BIT_PD";
5456 case PGMPOOLKIND_32BIT_PD_PHYS:
5457 return "PGMPOOLKIND_32BIT_PD_PHYS";
5458 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5459 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5460 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5461 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5462 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5463 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5464 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5465 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5466 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5467 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5468 case PGMPOOLKIND_PAE_PD_PHYS:
5469 return "PGMPOOLKIND_PAE_PD_PHYS";
5470 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5471 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5472 case PGMPOOLKIND_PAE_PDPT:
5473 return "PGMPOOLKIND_PAE_PDPT";
5474 case PGMPOOLKIND_PAE_PDPT_PHYS:
5475 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5476 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5477 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5478 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5479 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5480 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5481 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5482 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5483 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5484 case PGMPOOLKIND_64BIT_PML4:
5485 return "PGMPOOLKIND_64BIT_PML4";
5486 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5487 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5488 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5489 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5490 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5491 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5492 case PGMPOOLKIND_ROOT_NESTED:
5493 return "PGMPOOLKIND_ROOT_NESTED";
5494 }
5495 return "Unknown kind!";
5496}
5497#endif /* LOG_ENABLED || VBOX_STRICT */
5498
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette