VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 94612

Last change on this file since 94612 was 93931, checked in by vboxsync, 3 years ago

VMM,VMMDev,scm,VBox/param.h: Don't use PAGE_BASE_GC_MASK and friends in the VMM. bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 208.3 KB
Line 
1/* $Id: PGMAllPool.cpp 93931 2022-02-24 16:02:00Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#define VBOX_WITHOUT_PAGING_BIT_FIELDS /* 64-bit bitfields are just asking for trouble. See @bugref{9841} and others. */
24#include <VBox/vmm/pgm.h>
25#include <VBox/vmm/mm.h>
26#include <VBox/vmm/em.h>
27#include <VBox/vmm/cpum.h>
28#include "PGMInternal.h"
29#include <VBox/vmm/vmcc.h>
30#include "PGMInline.h"
31#include <VBox/disopcode.h>
32#include <VBox/vmm/hm_vmx.h>
33
34#include <VBox/log.h>
35#include <VBox/err.h>
36#include <iprt/asm.h>
37#include <iprt/string.h>
38
39
40/*********************************************************************************************************************************
41* Internal Functions *
42*********************************************************************************************************************************/
43RT_C_DECLS_BEGIN
44#if 0 /* unused */
45DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
46DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
47#endif /* unused */
48static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
49static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
50static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
51static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
52#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
53static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
54#endif
55#if 0 /*defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)*/
56static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
57#endif
58
59int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage);
60PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVMCC pVM, uint16_t *piPhysExt);
61void pgmPoolTrackPhysExtFree(PVMCC pVM, uint16_t iPhysExt);
62void pgmPoolTrackPhysExtFreeList(PVMCC pVM, uint16_t iPhysExt);
63
64RT_C_DECLS_END
65
66
67#if 0 /* unused */
68/**
69 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
70 *
71 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
72 * @param enmKind The page kind.
73 */
74DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
75{
76 switch (enmKind)
77 {
78 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
79 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
81 return true;
82 default:
83 return false;
84 }
85}
86#endif /* unused */
87
88
89/**
90 * Flushes a chain of pages sharing the same access monitor.
91 *
92 * @param pPool The pool.
93 * @param pPage A page in the chain.
94 */
95void pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
96{
97 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
98
99 /*
100 * Find the list head.
101 */
102 uint16_t idx = pPage->idx;
103 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
104 {
105 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
106 {
107 idx = pPage->iMonitoredPrev;
108 Assert(idx != pPage->idx);
109 pPage = &pPool->aPages[idx];
110 }
111 }
112
113 /*
114 * Iterate the list flushing each shadow page.
115 */
116 for (;;)
117 {
118 idx = pPage->iMonitoredNext;
119 Assert(idx != pPage->idx);
120 if (pPage->idx >= PGMPOOL_IDX_FIRST)
121 {
122 int rc2 = pgmPoolFlushPage(pPool, pPage);
123 AssertRC(rc2);
124 }
125 /* next */
126 if (idx == NIL_PGMPOOL_IDX)
127 break;
128 pPage = &pPool->aPages[idx];
129 }
130}
131
132
133/**
134 * Wrapper for getting the current context pointer to the entry being modified.
135 *
136 * @returns VBox status code suitable for scheduling.
137 * @param pVM The cross context VM structure.
138 * @param pvDst Destination address
139 * @param pvSrc Pointer to the mapping of @a GCPhysSrc or NULL depending
140 * on the context (e.g. \#PF in R0 & RC).
141 * @param GCPhysSrc The source guest physical address.
142 * @param cb Size of data to read
143 */
144DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVMCC pVM, void *pvDst, void const *pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
145{
146#if defined(IN_RING3)
147 NOREF(pVM); NOREF(GCPhysSrc);
148 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
149 return VINF_SUCCESS;
150#else
151 /** @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
152 NOREF(pvSrc);
153 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
154#endif
155}
156
157
158/**
159 * Process shadow entries before they are changed by the guest.
160 *
161 * For PT entries we will clear them. For PD entries, we'll simply check
162 * for mapping conflicts and set the SyncCR3 FF if found.
163 *
164 * @param pVCpu The cross context virtual CPU structure.
165 * @param pPool The pool.
166 * @param pPage The head page.
167 * @param GCPhysFault The guest physical fault address.
168 * @param pvAddress Pointer to the mapping of @a GCPhysFault or NULL
169 * depending on the context (e.g. \#PF in R0 & RC).
170 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
171 */
172static void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault,
173 void const *pvAddress, unsigned cbWrite)
174{
175 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
176 const unsigned off = GCPhysFault & GUEST_PAGE_OFFSET_MASK;
177 PVMCC pVM = pPool->CTX_SUFF(pVM);
178 NOREF(pVCpu);
179
180 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n",
181 (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))(uintptr_t)pvAddress, GCPhysFault, cbWrite));
182
183 for (;;)
184 {
185 union
186 {
187 void *pv;
188 PX86PT pPT;
189 PPGMSHWPTPAE pPTPae;
190 PX86PD pPD;
191 PX86PDPAE pPDPae;
192 PX86PDPT pPDPT;
193 PX86PML4 pPML4;
194 } uShw;
195
196 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s write=%#x\n",
197 pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
198
199 uShw.pv = NULL;
200 switch (pPage->enmKind)
201 {
202 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
203 {
204 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
205 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
206 const unsigned iShw = off / sizeof(X86PTE);
207 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
208 X86PGUINT const uPde = uShw.pPT->a[iShw].u;
209 if (uPde & X86_PTE_P)
210 {
211 X86PTE GstPte;
212 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
213 AssertRC(rc);
214 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uPde & X86_PTE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
215 pgmPoolTracDerefGCPhysHint(pPool, pPage, uPde & X86_PTE_PG_MASK, GstPte.u & X86_PTE_PG_MASK, iShw);
216 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
217 }
218 break;
219 }
220
221 /* page/2 sized */
222 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
223 {
224 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
225 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
226 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
227 {
228 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
229 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
230 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
231 {
232 X86PTE GstPte;
233 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
234 AssertRC(rc);
235
236 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
237 pgmPoolTracDerefGCPhysHint(pPool, pPage,
238 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
239 GstPte.u & X86_PTE_PG_MASK,
240 iShw);
241 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
242 }
243 }
244 break;
245 }
246
247 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
248 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
249 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
250 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
251 {
252 unsigned iGst = off / sizeof(X86PDE);
253 unsigned iShwPdpt = iGst / 256;
254 unsigned iShw = (iGst % 256) * 2;
255 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
256
257 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
258 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
259 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
260 {
261 for (unsigned i = 0; i < 2; i++)
262 {
263 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw + i].u;
264 if (uPde & X86_PDE_P)
265 {
266 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw + i, uPde));
267 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw + i);
268 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw + i].u, 0);
269 }
270
271 /* paranoia / a bit assumptive. */
272 if ( (off & 3)
273 && (off & 3) + cbWrite > 4)
274 {
275 const unsigned iShw2 = iShw + 2 + i;
276 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
277 {
278 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
279 if (uPde2 & X86_PDE_P)
280 {
281 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
282 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
283 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
284 }
285 }
286 }
287 }
288 }
289 break;
290 }
291
292 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
293 {
294 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
295 const unsigned iShw = off / sizeof(X86PTEPAE);
296 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
297 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
298 {
299 X86PTEPAE GstPte;
300 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
301 AssertRC(rc);
302
303 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
304 pgmPoolTracDerefGCPhysHint(pPool, pPage,
305 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
306 GstPte.u & X86_PTE_PAE_PG_MASK,
307 iShw);
308 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
309 }
310
311 /* paranoia / a bit assumptive. */
312 if ( (off & 7)
313 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
314 {
315 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
316 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
317
318 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
319 {
320 X86PTEPAE GstPte;
321 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
322 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
323 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
324 AssertRC(rc);
325 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
326 pgmPoolTracDerefGCPhysHint(pPool, pPage,
327 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
328 GstPte.u & X86_PTE_PAE_PG_MASK,
329 iShw2);
330 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
331 }
332 }
333 break;
334 }
335
336 case PGMPOOLKIND_32BIT_PD:
337 {
338 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
339 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
340
341 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
342 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
343 X86PGUINT const uPde = uShw.pPD->a[iShw].u;
344 if (uPde & X86_PDE_P)
345 {
346 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
347 pgmPoolFree(pVM, uPde & X86_PDE_PG_MASK, pPage->idx, iShw);
348 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
349 }
350
351 /* paranoia / a bit assumptive. */
352 if ( (off & 3)
353 && (off & 3) + cbWrite > sizeof(X86PTE))
354 {
355 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
356 if ( iShw2 != iShw
357 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
358 {
359 X86PGUINT const uPde2 = uShw.pPD->a[iShw2].u;
360 if (uPde2 & X86_PDE_P)
361 {
362 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
363 pgmPoolFree(pVM, uPde2 & X86_PDE_PG_MASK, pPage->idx, iShw2);
364 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
365 }
366 }
367 }
368#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). - not working any longer... */
369 if ( uShw.pPD->a[iShw].n.u1Present
370 && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
371 {
372 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
373 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
374 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
375 }
376#endif
377 break;
378 }
379
380 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
381 {
382 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
383 const unsigned iShw = off / sizeof(X86PDEPAE);
384 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
385
386 /*
387 * Causes trouble when the guest uses a PDE to refer to the whole page table level
388 * structure. (Invalidate here; faults later on when it tries to change the page
389 * table entries -> recheck; probably only applies to the RC case.)
390 */
391 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw].u;
392 if (uPde & X86_PDE_P)
393 {
394 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
395 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
396 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
397 }
398
399 /* paranoia / a bit assumptive. */
400 if ( (off & 7)
401 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
402 {
403 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
404 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
405
406 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
407 if (uPde2 & X86_PDE_P)
408 {
409 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
410 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
411 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
412 }
413 }
414 break;
415 }
416
417 case PGMPOOLKIND_PAE_PDPT:
418 {
419 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
420 /*
421 * Hopefully this doesn't happen very often:
422 * - touching unused parts of the page
423 * - messing with the bits of pd pointers without changing the physical address
424 */
425 /* PDPT roots are not page aligned; 32 byte only! */
426 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
427
428 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
429 const unsigned iShw = offPdpt / sizeof(X86PDPE);
430 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
431 {
432 X86PGPAEUINT const uPdpe = uShw.pPDPT->a[iShw].u;
433 if (uPdpe & X86_PDPE_P)
434 {
435 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
436 pgmPoolFree(pVM, uPdpe & X86_PDPE_PG_MASK, pPage->idx, iShw);
437 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
438 }
439
440 /* paranoia / a bit assumptive. */
441 if ( (offPdpt & 7)
442 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
443 {
444 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
445 if ( iShw2 != iShw
446 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
447 {
448 X86PGPAEUINT const uPdpe2 = uShw.pPDPT->a[iShw2].u;
449 if (uPdpe2 & X86_PDPE_P)
450 {
451 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
452 pgmPoolFree(pVM, uPdpe2 & X86_PDPE_PG_MASK, pPage->idx, iShw2);
453 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
454 }
455 }
456 }
457 }
458 break;
459 }
460
461 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
462 {
463 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
464 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
465 const unsigned iShw = off / sizeof(X86PDEPAE);
466 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw].u;
467 if (uPde & X86_PDE_P)
468 {
469 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
470 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
471 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
472 }
473
474 /* paranoia / a bit assumptive. */
475 if ( (off & 7)
476 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
477 {
478 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
479 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
480 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
481 if (uPde2 & X86_PDE_P)
482 {
483 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
484 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
485 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
486 }
487 }
488 break;
489 }
490
491 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
492 {
493 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
494 /*
495 * Hopefully this doesn't happen very often:
496 * - messing with the bits of pd pointers without changing the physical address
497 */
498 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
499 const unsigned iShw = off / sizeof(X86PDPE);
500 X86PGPAEUINT const uPdpe = uShw.pPDPT->a[iShw].u;
501 if (uPdpe & X86_PDPE_P)
502 {
503 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uPdpe));
504 pgmPoolFree(pVM, uPdpe & X86_PDPE_PG_MASK, pPage->idx, iShw);
505 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
506 }
507 /* paranoia / a bit assumptive. */
508 if ( (off & 7)
509 && (off & 7) + cbWrite > sizeof(X86PDPE))
510 {
511 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
512 X86PGPAEUINT const uPdpe2 = uShw.pPDPT->a[iShw2].u;
513 if (uPdpe2 & X86_PDPE_P)
514 {
515 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPdpe2));
516 pgmPoolFree(pVM, uPdpe2 & X86_PDPE_PG_MASK, pPage->idx, iShw2);
517 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
518 }
519 }
520 break;
521 }
522
523 case PGMPOOLKIND_64BIT_PML4:
524 {
525 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
526 /*
527 * Hopefully this doesn't happen very often:
528 * - messing with the bits of pd pointers without changing the physical address
529 */
530 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
531 const unsigned iShw = off / sizeof(X86PDPE);
532 X86PGPAEUINT const uPml4e = uShw.pPML4->a[iShw].u;
533 if (uPml4e & X86_PML4E_P)
534 {
535 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uPml4e));
536 pgmPoolFree(pVM, uPml4e & X86_PML4E_PG_MASK, pPage->idx, iShw);
537 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
538 }
539 /* paranoia / a bit assumptive. */
540 if ( (off & 7)
541 && (off & 7) + cbWrite > sizeof(X86PDPE))
542 {
543 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
544 X86PGPAEUINT const uPml4e2 = uShw.pPML4->a[iShw2].u;
545 if (uPml4e2 & X86_PML4E_P)
546 {
547 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPml4e2));
548 pgmPoolFree(pVM, uPml4e2 & X86_PML4E_PG_MASK, pPage->idx, iShw2);
549 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
550 }
551 }
552 break;
553 }
554
555 default:
556 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
557 }
558 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
559
560 /* next */
561 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
562 return;
563 pPage = &pPool->aPages[pPage->iMonitoredNext];
564 }
565}
566
567#ifndef IN_RING3
568
569/**
570 * Checks if a access could be a fork operation in progress.
571 *
572 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
573 *
574 * @returns true if it's likely that we're forking, otherwise false.
575 * @param pPool The pool.
576 * @param pDis The disassembled instruction.
577 * @param offFault The access offset.
578 */
579DECLINLINE(bool) pgmRZPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
580{
581 /*
582 * i386 linux is using btr to clear X86_PTE_RW.
583 * The functions involved are (2.6.16 source inspection):
584 * clear_bit
585 * ptep_set_wrprotect
586 * copy_one_pte
587 * copy_pte_range
588 * copy_pmd_range
589 * copy_pud_range
590 * copy_page_range
591 * dup_mmap
592 * dup_mm
593 * copy_mm
594 * copy_process
595 * do_fork
596 */
597 if ( pDis->pCurInstr->uOpcode == OP_BTR
598 && !(offFault & 4)
599 /** @todo Validate that the bit index is X86_PTE_RW. */
600 )
601 {
602 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,Fork)); RT_NOREF_PV(pPool);
603 return true;
604 }
605 return false;
606}
607
608
609/**
610 * Determine whether the page is likely to have been reused.
611 *
612 * @returns true if we consider the page as being reused for a different purpose.
613 * @returns false if we consider it to still be a paging page.
614 * @param pVM The cross context VM structure.
615 * @param pVCpu The cross context virtual CPU structure.
616 * @param pRegFrame Trap register frame.
617 * @param pDis The disassembly info for the faulting instruction.
618 * @param pvFault The fault address.
619 * @param pPage The pool page being accessed.
620 *
621 * @remark The REP prefix check is left to the caller because of STOSD/W.
622 */
623DECLINLINE(bool) pgmRZPoolMonitorIsReused(PVMCC pVM, PVMCPUCC pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault,
624 PPGMPOOLPAGE pPage)
625{
626 /* Locked (CR3, PDPTR*4) should not be reusable. Considering them as
627 such may cause loops booting tst-ubuntu-15_10-64-efi, ++. */
628 if (pPage->cLocked)
629 {
630 Log2(("pgmRZPoolMonitorIsReused: %RGv (%p) can't have been resued, because it's locked!\n", pvFault, pPage));
631 return false;
632 }
633
634 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
635 if ( HMHasPendingIrq(pVM)
636 && pRegFrame->rsp - pvFault < 32)
637 {
638 /* Fault caused by stack writes while trying to inject an interrupt event. */
639 Log(("pgmRZPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
640 return true;
641 }
642
643 LogFlow(("Reused instr %RGv %d at %RGv param1.fUse=%llx param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->uOpcode, pvFault, pDis->Param1.fUse, pDis->Param1.Base.idxGenReg));
644
645 /* Non-supervisor mode write means it's used for something else. */
646 if (CPUMGetGuestCPL(pVCpu) == 3)
647 return true;
648
649 switch (pDis->pCurInstr->uOpcode)
650 {
651 /* call implies the actual push of the return address faulted */
652 case OP_CALL:
653 Log4(("pgmRZPoolMonitorIsReused: CALL\n"));
654 return true;
655 case OP_PUSH:
656 Log4(("pgmRZPoolMonitorIsReused: PUSH\n"));
657 return true;
658 case OP_PUSHF:
659 Log4(("pgmRZPoolMonitorIsReused: PUSHF\n"));
660 return true;
661 case OP_PUSHA:
662 Log4(("pgmRZPoolMonitorIsReused: PUSHA\n"));
663 return true;
664 case OP_FXSAVE:
665 Log4(("pgmRZPoolMonitorIsReused: FXSAVE\n"));
666 return true;
667 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
668 Log4(("pgmRZPoolMonitorIsReused: MOVNTI\n"));
669 return true;
670 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
671 Log4(("pgmRZPoolMonitorIsReused: MOVNTDQ\n"));
672 return true;
673 case OP_MOVSWD:
674 case OP_STOSWD:
675 if ( pDis->fPrefix == (DISPREFIX_REP|DISPREFIX_REX)
676 && pRegFrame->rcx >= 0x40
677 )
678 {
679 Assert(pDis->uCpuMode == DISCPUMODE_64BIT);
680
681 Log(("pgmRZPoolMonitorIsReused: OP_STOSQ\n"));
682 return true;
683 }
684 break;
685
686 default:
687 /*
688 * Anything having ESP on the left side means stack writes.
689 */
690 if ( ( (pDis->Param1.fUse & DISUSE_REG_GEN32)
691 || (pDis->Param1.fUse & DISUSE_REG_GEN64))
692 && (pDis->Param1.Base.idxGenReg == DISGREG_ESP))
693 {
694 Log4(("pgmRZPoolMonitorIsReused: ESP\n"));
695 return true;
696 }
697 break;
698 }
699
700 /*
701 * Page table updates are very very unlikely to be crossing page boundraries,
702 * and we don't want to deal with that in pgmPoolMonitorChainChanging and such.
703 */
704 uint32_t const cbWrite = DISGetParamSize(pDis, &pDis->Param1);
705 if ( (((uintptr_t)pvFault + cbWrite) >> X86_PAGE_SHIFT) != ((uintptr_t)pvFault >> X86_PAGE_SHIFT) )
706 {
707 Log4(("pgmRZPoolMonitorIsReused: cross page write\n"));
708 return true;
709 }
710
711 /*
712 * Nobody does an unaligned 8 byte write to a page table, right.
713 */
714 if (cbWrite >= 8 && ((uintptr_t)pvFault & 7) != 0)
715 {
716 Log4(("pgmRZPoolMonitorIsReused: Unaligned 8+ byte write\n"));
717 return true;
718 }
719
720 return false;
721}
722
723
724/**
725 * Flushes the page being accessed.
726 *
727 * @returns VBox status code suitable for scheduling.
728 * @param pVM The cross context VM structure.
729 * @param pVCpu The cross context virtual CPU structure.
730 * @param pPool The pool.
731 * @param pPage The pool page (head).
732 * @param pDis The disassembly of the write instruction.
733 * @param pRegFrame The trap register frame.
734 * @param GCPhysFault The fault address as guest physical address.
735 * @param pvFault The fault address.
736 * @todo VBOXSTRICTRC
737 */
738static int pgmRZPoolAccessPfHandlerFlush(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
739 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
740{
741 NOREF(pVM); NOREF(GCPhysFault);
742
743 /*
744 * First, do the flushing.
745 */
746 pgmPoolMonitorChainFlush(pPool, pPage);
747
748 /*
749 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
750 * Must do this in raw mode (!); XP boot will fail otherwise.
751 */
752 int rc = VINF_SUCCESS;
753 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
754 if (rc2 == VINF_SUCCESS)
755 { /* do nothing */ }
756 else if (rc2 == VINF_EM_RESCHEDULE)
757 {
758 rc = VBOXSTRICTRC_VAL(rc2);
759# ifndef IN_RING3
760 VMCPU_FF_SET(pVCpu, VMCPU_FF_TO_R3);
761# endif
762 }
763 else if (rc2 == VERR_EM_INTERPRETER)
764 {
765 rc = VINF_EM_RAW_EMULATE_INSTR;
766 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
767 }
768 else if (RT_FAILURE_NP(rc2))
769 rc = VBOXSTRICTRC_VAL(rc2);
770 else
771 AssertMsgFailed(("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
772
773 LogFlow(("pgmRZPoolAccessPfHandlerFlush: returns %Rrc (flushed)\n", rc));
774 return rc;
775}
776
777
778/**
779 * Handles the STOSD write accesses.
780 *
781 * @returns VBox status code suitable for scheduling.
782 * @param pVM The cross context VM structure.
783 * @param pPool The pool.
784 * @param pPage The pool page (head).
785 * @param pDis The disassembly of the write instruction.
786 * @param pRegFrame The trap register frame.
787 * @param GCPhysFault The fault address as guest physical address.
788 * @param pvFault The fault address.
789 */
790DECLINLINE(int) pgmRZPoolAccessPfHandlerSTOSD(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
791 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
792{
793 unsigned uIncrement = pDis->Param1.cb;
794 NOREF(pVM);
795
796 Assert(pDis->uCpuMode == DISCPUMODE_32BIT || pDis->uCpuMode == DISCPUMODE_64BIT);
797 Assert(pRegFrame->rcx <= 0x20);
798
799# ifdef VBOX_STRICT
800 if (pDis->uOpMode == DISCPUMODE_32BIT)
801 Assert(uIncrement == 4);
802 else
803 Assert(uIncrement == 8);
804# endif
805
806 Log3(("pgmRZPoolAccessPfHandlerSTOSD\n"));
807
808 /*
809 * Increment the modification counter and insert it into the list
810 * of modified pages the first time.
811 */
812 if (!pPage->cModifications++)
813 pgmPoolMonitorModifiedInsert(pPool, pPage);
814
815 /*
816 * Execute REP STOSD.
817 *
818 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
819 * write situation, meaning that it's safe to write here.
820 */
821 PVMCPUCC pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
822 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
823 while (pRegFrame->rcx)
824 {
825 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
826 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
827 pu32 += uIncrement;
828 GCPhysFault += uIncrement;
829 pRegFrame->rdi += uIncrement;
830 pRegFrame->rcx--;
831 }
832 pRegFrame->rip += pDis->cbInstr;
833
834 LogFlow(("pgmRZPoolAccessPfHandlerSTOSD: returns\n"));
835 return VINF_SUCCESS;
836}
837
838
839/**
840 * Handles the simple write accesses.
841 *
842 * @returns VBox status code suitable for scheduling.
843 * @param pVM The cross context VM structure.
844 * @param pVCpu The cross context virtual CPU structure.
845 * @param pPool The pool.
846 * @param pPage The pool page (head).
847 * @param pDis The disassembly of the write instruction.
848 * @param pRegFrame The trap register frame.
849 * @param GCPhysFault The fault address as guest physical address.
850 * @param pvFault The fault address.
851 * @param pfReused Reused state (in/out)
852 */
853DECLINLINE(int) pgmRZPoolAccessPfHandlerSimple(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
854 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
855{
856 Log3(("pgmRZPoolAccessPfHandlerSimple\n"));
857 NOREF(pVM);
858 NOREF(pfReused); /* initialized by caller */
859
860 /*
861 * Increment the modification counter and insert it into the list
862 * of modified pages the first time.
863 */
864 if (!pPage->cModifications++)
865 pgmPoolMonitorModifiedInsert(pPool, pPage);
866
867 /*
868 * Clear all the pages. ASSUMES that pvFault is readable.
869 */
870 uint32_t cbWrite = DISGetParamSize(pDis, &pDis->Param1);
871 if (cbWrite <= 8)
872 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, cbWrite);
873 else if (cbWrite <= 16)
874 {
875 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, 8);
876 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + 8, NULL, cbWrite - 8);
877 }
878 else
879 {
880 Assert(cbWrite <= 32);
881 for (uint32_t off = 0; off < cbWrite; off += 8)
882 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + off, NULL, RT_MIN(8, cbWrite - off));
883 }
884
885 /*
886 * Interpret the instruction.
887 */
888 VBOXSTRICTRC rc = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
889 if (RT_SUCCESS(rc))
890 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
891 else if (rc == VERR_EM_INTERPRETER)
892 {
893 LogFlow(("pgmRZPoolAccessPfHandlerSimple: Interpretation failed for %04x:%RGv - opcode=%d\n",
894 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode));
895 rc = VINF_EM_RAW_EMULATE_INSTR;
896 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
897 }
898
899# if 0 /* experimental code */
900 if (rc == VINF_SUCCESS)
901 {
902 switch (pPage->enmKind)
903 {
904 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
905 {
906 X86PTEPAE GstPte;
907 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
908 AssertRC(rc);
909
910 /* Check the new value written by the guest. If present and with a bogus physical address, then
911 * it's fairly safe to assume the guest is reusing the PT.
912 */
913 if (GstPte.n.u1Present)
914 {
915 RTHCPHYS HCPhys = -1;
916 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
917 if (rc != VINF_SUCCESS)
918 {
919 *pfReused = true;
920 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
921 }
922 }
923 break;
924 }
925 }
926 }
927# endif
928
929 LogFlow(("pgmRZPoolAccessPfHandlerSimple: returns %Rrc\n", VBOXSTRICTRC_VAL(rc)));
930 return VBOXSTRICTRC_VAL(rc);
931}
932
933
934/**
935 * @callback_method_impl{FNPGMRZPHYSPFHANDLER,
936 * \#PF access handler callback for page table pages.}
937 *
938 * @remarks The @a uUser argument is the index of the PGMPOOLPAGE.
939 */
940DECLEXPORT(VBOXSTRICTRC) pgmRZPoolAccessPfHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame,
941 RTGCPTR pvFault, RTGCPHYS GCPhysFault, uint64_t uUser)
942{
943 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorRZ, a);
944 PPGMPOOL const pPool = pVM->pgm.s.CTX_SUFF(pPool);
945 AssertReturn(uUser < pPool->cCurPages, VERR_PGM_POOL_IPE);
946 PPGMPOOLPAGE const pPage = &pPool->aPages[uUser];
947 unsigned cMaxModifications;
948 bool fForcedFlush = false;
949 RT_NOREF_PV(uErrorCode);
950
951 LogFlow(("pgmRZPoolAccessPfHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
952
953 PGM_LOCK_VOID(pVM);
954 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
955 {
956 /* Pool page changed while we were waiting for the lock; ignore. */
957 Log(("CPU%d: pgmRZPoolAccessPfHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
958 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
959 PGM_UNLOCK(pVM);
960 return VINF_SUCCESS;
961 }
962# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
963 if (pPage->fDirty)
964 {
965 Assert(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH));
966 PGM_UNLOCK(pVM);
967 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
968 }
969# endif
970
971# if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
972 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
973 {
974 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
975 void *pvGst;
976 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
977 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
978 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
979 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
980 }
981# endif
982
983 /*
984 * Disassemble the faulting instruction.
985 */
986 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
987 int rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
988 if (RT_UNLIKELY(rc != VINF_SUCCESS))
989 {
990 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
991 PGM_UNLOCK(pVM);
992 return rc;
993 }
994
995 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
996
997 /*
998 * We should ALWAYS have the list head as user parameter. This
999 * is because we use that page to record the changes.
1000 */
1001 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1002
1003# ifdef IN_RING0
1004 /* Maximum nr of modifications depends on the page type. */
1005 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1006 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1007 cMaxModifications = 4;
1008 else
1009 cMaxModifications = 24;
1010# else
1011 cMaxModifications = 48;
1012# endif
1013
1014 /*
1015 * Incremental page table updates should weigh more than random ones.
1016 * (Only applies when started from offset 0)
1017 */
1018 pVCpu->pgm.s.cPoolAccessHandler++;
1019 if ( pPage->GCPtrLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1020 && pPage->GCPtrLastAccessHandlerRip < pRegFrame->rip + 0x40
1021 && pvFault == (pPage->GCPtrLastAccessHandlerFault + pDis->Param1.cb)
1022 && pVCpu->pgm.s.cPoolAccessHandler == pPage->cLastAccessHandler + 1)
1023 {
1024 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1025 Assert(pPage->cModifications < 32000);
1026 pPage->cModifications = pPage->cModifications * 2;
1027 pPage->GCPtrLastAccessHandlerFault = pvFault;
1028 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1029 if (pPage->cModifications >= cMaxModifications)
1030 {
1031 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushReinit);
1032 fForcedFlush = true;
1033 }
1034 }
1035
1036 if (pPage->cModifications >= cMaxModifications)
1037 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1038
1039 /*
1040 * Check if it's worth dealing with.
1041 */
1042 bool fReused = false;
1043 bool fNotReusedNotForking = false;
1044 if ( ( pPage->cModifications < cMaxModifications /** @todo \#define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1045 || pgmPoolIsPageLocked(pPage)
1046 )
1047 && !(fReused = pgmRZPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault, pPage))
1048 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1049 {
1050 /*
1051 * Simple instructions, no REP prefix.
1052 */
1053 if (!(pDis->fPrefix & (DISPREFIX_REP | DISPREFIX_REPNE)))
1054 {
1055 rc = pgmRZPoolAccessPfHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1056 if (fReused)
1057 goto flushPage;
1058
1059 /* A mov instruction to change the first page table entry will be remembered so we can detect
1060 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1061 */
1062 if ( rc == VINF_SUCCESS
1063 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1064 && pDis->pCurInstr->uOpcode == OP_MOV
1065 && (pvFault & PAGE_OFFSET_MASK) == 0)
1066 {
1067 pPage->GCPtrLastAccessHandlerFault = pvFault;
1068 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1069 pPage->GCPtrLastAccessHandlerRip = pRegFrame->rip;
1070 /* Make sure we don't kick out a page too quickly. */
1071 if (pPage->cModifications > 8)
1072 pPage->cModifications = 2;
1073 }
1074 else if (pPage->GCPtrLastAccessHandlerFault == pvFault)
1075 {
1076 /* ignore the 2nd write to this page table entry. */
1077 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1078 }
1079 else
1080 {
1081 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
1082 pPage->GCPtrLastAccessHandlerRip = 0;
1083 }
1084
1085 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
1086 PGM_UNLOCK(pVM);
1087 return rc;
1088 }
1089
1090 /*
1091 * Windows is frequently doing small memset() operations (netio test 4k+).
1092 * We have to deal with these or we'll kill the cache and performance.
1093 */
1094 if ( pDis->pCurInstr->uOpcode == OP_STOSWD
1095 && !pRegFrame->eflags.Bits.u1DF
1096 && pDis->uOpMode == pDis->uCpuMode
1097 && pDis->uAddrMode == pDis->uCpuMode)
1098 {
1099 bool fValidStosd = false;
1100
1101 if ( pDis->uCpuMode == DISCPUMODE_32BIT
1102 && pDis->fPrefix == DISPREFIX_REP
1103 && pRegFrame->ecx <= 0x20
1104 && pRegFrame->ecx * 4 <= GUEST_PAGE_SIZE - ((uintptr_t)pvFault & GUEST_PAGE_OFFSET_MASK)
1105 && !((uintptr_t)pvFault & 3)
1106 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1107 )
1108 {
1109 fValidStosd = true;
1110 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1111 }
1112 else
1113 if ( pDis->uCpuMode == DISCPUMODE_64BIT
1114 && pDis->fPrefix == (DISPREFIX_REP | DISPREFIX_REX)
1115 && pRegFrame->rcx <= 0x20
1116 && pRegFrame->rcx * 8 <= GUEST_PAGE_SIZE - ((uintptr_t)pvFault & GUEST_PAGE_OFFSET_MASK)
1117 && !((uintptr_t)pvFault & 7)
1118 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1119 )
1120 {
1121 fValidStosd = true;
1122 }
1123
1124 if (fValidStosd)
1125 {
1126 rc = pgmRZPoolAccessPfHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1127 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZRepStosd, a);
1128 PGM_UNLOCK(pVM);
1129 return rc;
1130 }
1131 }
1132
1133 /* REP prefix, don't bother. */
1134 STAM_COUNTER_INC(&pPool->StatMonitorPfRZRepPrefix);
1135 Log4(("pgmRZPoolAccessPfHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1136 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode, pDis->fPrefix));
1137 fNotReusedNotForking = true;
1138 }
1139
1140# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1141 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1142 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1143 */
1144 if ( pPage->cModifications >= cMaxModifications
1145 && !fForcedFlush
1146 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1147 && ( fNotReusedNotForking
1148 || ( !pgmRZPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault, pPage)
1149 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1150 )
1151 )
1152 {
1153 Assert(!pgmPoolIsPageLocked(pPage));
1154 Assert(pPage->fDirty == false);
1155
1156 /* Flush any monitored duplicates as we will disable write protection. */
1157 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1158 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1159 {
1160 PPGMPOOLPAGE pPageHead = pPage;
1161
1162 /* Find the monitor head. */
1163 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1164 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1165
1166 while (pPageHead)
1167 {
1168 unsigned idxNext = pPageHead->iMonitoredNext;
1169
1170 if (pPageHead != pPage)
1171 {
1172 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1173 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1174 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1175 AssertRC(rc2);
1176 }
1177
1178 if (idxNext == NIL_PGMPOOL_IDX)
1179 break;
1180
1181 pPageHead = &pPool->aPages[idxNext];
1182 }
1183 }
1184
1185 /* The flushing above might fail for locked pages, so double check. */
1186 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1187 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1188 {
1189 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1190
1191 /* Temporarily allow write access to the page table again. */
1192 rc = PGMHandlerPhysicalPageTempOff(pVM,
1193 pPage->GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK,
1194 pPage->GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
1195 if (rc == VINF_SUCCESS)
1196 {
1197 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1198 AssertMsg(rc == VINF_SUCCESS
1199 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1200 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1201 || rc == VERR_PAGE_NOT_PRESENT,
1202 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1203# ifdef VBOX_STRICT
1204 pPage->GCPtrDirtyFault = pvFault;
1205# endif
1206
1207 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, a);
1208 PGM_UNLOCK(pVM);
1209 return rc;
1210 }
1211 }
1212 }
1213# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT && IN_RING0 */
1214
1215 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushModOverflow);
1216flushPage:
1217 /*
1218 * Not worth it, so flush it.
1219 *
1220 * If we considered it to be reused, don't go back to ring-3
1221 * to emulate failed instructions since we usually cannot
1222 * interpret then. This may be a bit risky, in which case
1223 * the reuse detection must be fixed.
1224 */
1225 rc = pgmRZPoolAccessPfHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1226 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1227 && fReused)
1228 {
1229 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1230 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1231 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1232 }
1233 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZFlushPage, a);
1234 PGM_UNLOCK(pVM);
1235 return rc;
1236}
1237
1238#endif /* !IN_RING3 */
1239
1240/**
1241 * @callback_method_impl{FNPGMPHYSHANDLER,
1242 * Access handler for shadowed page table pages.}
1243 *
1244 * @remarks Only uses the VINF_PGM_HANDLER_DO_DEFAULT status.
1245 * @note The @a uUser argument is the index of the PGMPOOLPAGE.
1246 */
1247PGM_ALL_CB2_DECL(VBOXSTRICTRC)
1248pgmPoolAccessHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf,
1249 PGMACCESSTYPE enmAccessType, PGMACCESSORIGIN enmOrigin, uint64_t uUser)
1250{
1251 PPGMPOOL const pPool = pVM->pgm.s.CTX_SUFF(pPool);
1252 STAM_PROFILE_START(&pPool->CTX_SUFF_Z(StatMonitor), a);
1253 AssertReturn(uUser < pPool->cCurPages, VERR_PGM_POOL_IPE);
1254 PPGMPOOLPAGE const pPage = &pPool->aPages[uUser];
1255 LogFlow(("PGM_ALL_CB_DECL: GCPhys=%RGp %p:{.Core=%RHp, .idx=%d, .GCPhys=%RGp, .enmType=%d}\n",
1256 GCPhys, pPage, pPage->Core.Key, pPage->idx, pPage->GCPhys, pPage->enmKind));
1257
1258 NOREF(pvPhys); NOREF(pvBuf); NOREF(enmAccessType);
1259
1260 PGM_LOCK_VOID(pVM);
1261
1262#ifdef VBOX_WITH_STATISTICS
1263 /*
1264 * Collect stats on the access.
1265 */
1266 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Sizes)) == 19);
1267 if (cbBuf <= 16 && cbBuf > 0)
1268 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[cbBuf - 1]);
1269 else if (cbBuf >= 17 && cbBuf < 32)
1270 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[16]);
1271 else if (cbBuf >= 32 && cbBuf < 64)
1272 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[17]);
1273 else if (cbBuf >= 64)
1274 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[18]);
1275
1276 uint8_t cbAlign;
1277 switch (pPage->enmKind)
1278 {
1279 default:
1280 cbAlign = 7;
1281 break;
1282 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1283 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1284 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1285 case PGMPOOLKIND_32BIT_PD:
1286 case PGMPOOLKIND_32BIT_PD_PHYS:
1287 cbAlign = 3;
1288 break;
1289 }
1290 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Misaligned)) == 7);
1291 if ((uint8_t)GCPhys & cbAlign)
1292 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Misaligned)[((uint8_t)GCPhys & cbAlign) - 1]);
1293#endif
1294
1295 /*
1296 * Make sure the pool page wasn't modified by a different CPU.
1297 */
1298 if (PHYS_PAGE_ADDRESS(GCPhys) == PHYS_PAGE_ADDRESS(pPage->GCPhys))
1299 {
1300 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1301
1302 /* The max modification count before flushing depends on the context and page type. */
1303#ifdef IN_RING3
1304 uint16_t const cMaxModifications = 96; /* it's cheaper here, right? */
1305#else
1306 uint16_t cMaxModifications;
1307 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1308 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1309 cMaxModifications = 4;
1310 else
1311 cMaxModifications = 24;
1312#endif
1313
1314 /*
1315 * We don't have to be very sophisticated about this since there are relativly few calls here.
1316 * However, we must try our best to detect any non-cpu accesses (disk / networking).
1317 */
1318 if ( ( pPage->cModifications < cMaxModifications
1319 || pgmPoolIsPageLocked(pPage) )
1320 && enmOrigin != PGMACCESSORIGIN_DEVICE
1321 && cbBuf <= 16)
1322 {
1323 /* Clear the shadow entry. */
1324 if (!pPage->cModifications++)
1325 pgmPoolMonitorModifiedInsert(pPool, pPage);
1326
1327 if (cbBuf <= 8)
1328 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, (uint32_t)cbBuf);
1329 else
1330 {
1331 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, 8);
1332 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys + 8, (uint8_t *)pvBuf + 8, (uint32_t)cbBuf - 8);
1333 }
1334 }
1335 else
1336 pgmPoolMonitorChainFlush(pPool, pPage);
1337
1338 STAM_PROFILE_STOP_EX(&pPool->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1339 }
1340 else
1341 Log(("CPU%d: PGM_ALL_CB_DECL pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhys), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1342 PGM_UNLOCK(pVM);
1343 return VINF_PGM_HANDLER_DO_DEFAULT;
1344}
1345
1346
1347#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1348
1349# if defined(VBOX_STRICT) && !defined(IN_RING3)
1350
1351/**
1352 * Check references to guest physical memory in a PAE / PAE page table.
1353 *
1354 * @param pPool The pool.
1355 * @param pPage The page.
1356 * @param pShwPT The shadow page table (mapping of the page).
1357 * @param pGstPT The guest page table.
1358 */
1359static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1360{
1361 unsigned cErrors = 0;
1362 int LastRc = -1; /* initialized to shut up gcc */
1363 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1364 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1365 PVMCC pVM = pPool->CTX_SUFF(pVM);
1366
1367# ifdef VBOX_STRICT
1368 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1369 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1370# endif
1371 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1372 {
1373 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1374 {
1375 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1376 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1377 if ( rc != VINF_SUCCESS
1378 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1379 {
1380 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1381 LastPTE = i;
1382 LastRc = rc;
1383 LastHCPhys = HCPhys;
1384 cErrors++;
1385
1386 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1387 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1388 AssertRC(rc);
1389
1390 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1391 {
1392 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1393
1394 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1395 {
1396 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1397
1398 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1399 {
1400 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1401 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1402 {
1403 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1404 }
1405 }
1406
1407 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1408 }
1409 }
1410 }
1411 }
1412 }
1413 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1414}
1415
1416
1417/**
1418 * Check references to guest physical memory in a PAE / 32-bit page table.
1419 *
1420 * @param pPool The pool.
1421 * @param pPage The page.
1422 * @param pShwPT The shadow page table (mapping of the page).
1423 * @param pGstPT The guest page table.
1424 */
1425static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1426{
1427 unsigned cErrors = 0;
1428 int LastRc = -1; /* initialized to shut up gcc */
1429 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1430 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1431 PVMCC pVM = pPool->CTX_SUFF(pVM);
1432
1433# ifdef VBOX_STRICT
1434 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1435 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1436# endif
1437 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1438 {
1439 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1440 {
1441 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1442 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1443 if ( rc != VINF_SUCCESS
1444 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1445 {
1446 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1447 LastPTE = i;
1448 LastRc = rc;
1449 LastHCPhys = HCPhys;
1450 cErrors++;
1451
1452 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1453 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1454 AssertRC(rc);
1455
1456 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1457 {
1458 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1459
1460 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1461 {
1462 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1463
1464 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1465 {
1466 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1467 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1468 {
1469 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1470 }
1471 }
1472
1473 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1474 }
1475 }
1476 }
1477 }
1478 }
1479 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1480}
1481
1482# endif /* VBOX_STRICT && !IN_RING3 */
1483
1484/**
1485 * Clear references to guest physical memory in a PAE / PAE page table.
1486 *
1487 * @returns nr of changed PTEs
1488 * @param pPool The pool.
1489 * @param pPage The page.
1490 * @param pShwPT The shadow page table (mapping of the page).
1491 * @param pGstPT The guest page table.
1492 * @param pOldGstPT The old cached guest page table.
1493 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1494 * @param pfFlush Flush reused page table (out)
1495 */
1496DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1497 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1498{
1499 unsigned cChanged = 0;
1500
1501# ifdef VBOX_STRICT
1502 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1503 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1504# endif
1505 *pfFlush = false;
1506
1507 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1508 {
1509 /* Check the new value written by the guest. If present and with a bogus physical address, then
1510 * it's fairly safe to assume the guest is reusing the PT.
1511 */
1512 if ( fAllowRemoval
1513 && (pGstPT->a[i].u & X86_PTE_P))
1514 {
1515 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1516 {
1517 *pfFlush = true;
1518 return ++cChanged;
1519 }
1520 }
1521 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1522 {
1523 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1524 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1525 {
1526# ifdef VBOX_STRICT
1527 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1528 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1529 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1530# endif
1531 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1532 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1533 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1534 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1535
1536 if ( uHostAttr == uGuestAttr
1537 && fHostRW <= fGuestRW)
1538 continue;
1539 }
1540 cChanged++;
1541 /* Something was changed, so flush it. */
1542 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1543 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1544 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1545 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1546 }
1547 }
1548 return cChanged;
1549}
1550
1551
1552/**
1553 * Clear references to guest physical memory in a PAE / PAE page table.
1554 *
1555 * @returns nr of changed PTEs
1556 * @param pPool The pool.
1557 * @param pPage The page.
1558 * @param pShwPT The shadow page table (mapping of the page).
1559 * @param pGstPT The guest page table.
1560 * @param pOldGstPT The old cached guest page table.
1561 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1562 * @param pfFlush Flush reused page table (out)
1563 */
1564DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1565 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1566{
1567 unsigned cChanged = 0;
1568
1569# ifdef VBOX_STRICT
1570 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1571 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1572# endif
1573 *pfFlush = false;
1574
1575 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1576 {
1577 /* Check the new value written by the guest. If present and with a bogus physical address, then
1578 * it's fairly safe to assume the guest is reusing the PT. */
1579 if (fAllowRemoval)
1580 {
1581 X86PGUINT const uPte = pGstPT->a[i].u;
1582 if ( (uPte & X86_PTE_P)
1583 && !PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), uPte & X86_PTE_PG_MASK))
1584 {
1585 *pfFlush = true;
1586 return ++cChanged;
1587 }
1588 }
1589 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1590 {
1591 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1592 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1593 {
1594# ifdef VBOX_STRICT
1595 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1596 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1597 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1598# endif
1599 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1600 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1601 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1602 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1603
1604 if ( uHostAttr == uGuestAttr
1605 && fHostRW <= fGuestRW)
1606 continue;
1607 }
1608 cChanged++;
1609 /* Something was changed, so flush it. */
1610 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1611 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1612 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1613 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1614 }
1615 }
1616 return cChanged;
1617}
1618
1619
1620/**
1621 * Flush a dirty page
1622 *
1623 * @param pVM The cross context VM structure.
1624 * @param pPool The pool.
1625 * @param idxSlot Dirty array slot index
1626 * @param fAllowRemoval Allow a reused page table to be removed
1627 */
1628static void pgmPoolFlushDirtyPage(PVMCC pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1629{
1630 AssertCompile(RT_ELEMENTS(pPool->aidxDirtyPages) == RT_ELEMENTS(pPool->aDirtyPages));
1631
1632 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1633 unsigned idxPage = pPool->aidxDirtyPages[idxSlot];
1634 if (idxPage == NIL_PGMPOOL_IDX)
1635 return;
1636
1637 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1638 Assert(pPage->idx == idxPage);
1639 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1640
1641 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1642 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1643
1644 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1645 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
1646 Assert(rc == VINF_SUCCESS);
1647 pPage->fDirty = false;
1648
1649# ifdef VBOX_STRICT
1650 uint64_t fFlags = 0;
1651 RTHCPHYS HCPhys;
1652 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->GCPtrDirtyFault, &fFlags, &HCPhys);
1653 AssertMsg( ( rc == VINF_SUCCESS
1654 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1655 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1656 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1657 || rc == VERR_PAGE_NOT_PRESENT,
1658 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->GCPtrDirtyFault, rc, fFlags));
1659# endif
1660
1661 /* Flush those PTEs that have changed. */
1662 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1663 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1664 void *pvGst;
1665 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1666 bool fFlush;
1667 unsigned cChanges;
1668
1669 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1670 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1671 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1672 else
1673 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1674 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1675
1676 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1677 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1678 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1679 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1680
1681 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1682 Assert(pPage->cModifications);
1683 if (cChanges < 4)
1684 pPage->cModifications = 1; /* must use > 0 here */
1685 else
1686 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1687
1688 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1689 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1690 pPool->idxFreeDirtyPage = idxSlot;
1691
1692 pPool->cDirtyPages--;
1693 pPool->aidxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1694 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1695 if (fFlush)
1696 {
1697 Assert(fAllowRemoval);
1698 Log(("Flush reused page table!\n"));
1699 pgmPoolFlushPage(pPool, pPage);
1700 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1701 }
1702 else
1703 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1704}
1705
1706
1707# ifndef IN_RING3
1708/**
1709 * Add a new dirty page
1710 *
1711 * @param pVM The cross context VM structure.
1712 * @param pPool The pool.
1713 * @param pPage The page.
1714 */
1715void pgmPoolAddDirtyPage(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1716{
1717 PGM_LOCK_ASSERT_OWNER(pVM);
1718 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1719 Assert(!pPage->fDirty);
1720
1721 unsigned idxFree = pPool->idxFreeDirtyPage;
1722 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1723 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1724
1725 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1726 {
1727 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1728 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1729 }
1730 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1731 AssertMsg(pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1732
1733 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1734
1735 /*
1736 * Make a copy of the guest page table as we require valid GCPhys addresses
1737 * when removing references to physical pages.
1738 * (The HCPhys linear lookup is *extremely* expensive!)
1739 */
1740 void *pvGst;
1741 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1742 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst,
1743 pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT ? PAGE_SIZE : PAGE_SIZE / 2);
1744# ifdef VBOX_STRICT
1745 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1746 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1747 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1748 else
1749 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1750 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1751# endif
1752 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1753
1754 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1755 pPage->fDirty = true;
1756 pPage->idxDirtyEntry = (uint8_t)idxFree; Assert(pPage->idxDirtyEntry == idxFree);
1757 pPool->aidxDirtyPages[idxFree] = pPage->idx;
1758 pPool->cDirtyPages++;
1759
1760 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1761 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1762 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1763 {
1764 unsigned i;
1765 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1766 {
1767 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1768 if (pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1769 {
1770 pPool->idxFreeDirtyPage = idxFree;
1771 break;
1772 }
1773 }
1774 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1775 }
1776
1777 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1778
1779 /*
1780 * Clear all references to this shadow table. See @bugref{7298}.
1781 */
1782 pgmPoolTrackClearPageUsers(pPool, pPage);
1783}
1784# endif /* !IN_RING3 */
1785
1786
1787/**
1788 * Check if the specified page is dirty (not write monitored)
1789 *
1790 * @return dirty or not
1791 * @param pVM The cross context VM structure.
1792 * @param GCPhys Guest physical address
1793 */
1794bool pgmPoolIsDirtyPageSlow(PVMCC pVM, RTGCPHYS GCPhys)
1795{
1796 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1797 PGM_LOCK_ASSERT_OWNER(pVM);
1798 if (!pPool->cDirtyPages)
1799 return false;
1800
1801 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1802
1803 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1804 {
1805 unsigned idxPage = pPool->aidxDirtyPages[i];
1806 if (idxPage != NIL_PGMPOOL_IDX)
1807 {
1808 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1809 if (pPage->GCPhys == GCPhys)
1810 return true;
1811 }
1812 }
1813 return false;
1814}
1815
1816
1817/**
1818 * Reset all dirty pages by reinstating page monitoring.
1819 *
1820 * @param pVM The cross context VM structure.
1821 */
1822void pgmPoolResetDirtyPages(PVMCC pVM)
1823{
1824 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1825 PGM_LOCK_ASSERT_OWNER(pVM);
1826 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1827
1828 if (!pPool->cDirtyPages)
1829 return;
1830
1831 Log(("pgmPoolResetDirtyPages\n"));
1832 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1833 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1834
1835 pPool->idxFreeDirtyPage = 0;
1836 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1837 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1838 {
1839 unsigned i;
1840 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1841 {
1842 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
1843 {
1844 pPool->idxFreeDirtyPage = i;
1845 break;
1846 }
1847 }
1848 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1849 }
1850
1851 Assert(pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
1852 return;
1853}
1854
1855
1856/**
1857 * Invalidate the PT entry for the specified page
1858 *
1859 * @param pVM The cross context VM structure.
1860 * @param GCPtrPage Guest page to invalidate
1861 */
1862void pgmPoolResetDirtyPage(PVMCC pVM, RTGCPTR GCPtrPage)
1863{
1864 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1865 PGM_LOCK_ASSERT_OWNER(pVM);
1866 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1867
1868 if (!pPool->cDirtyPages)
1869 return;
1870
1871 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage)); RT_NOREF_PV(GCPtrPage);
1872 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1873 {
1874 /** @todo What was intended here??? This looks incomplete... */
1875 }
1876}
1877
1878
1879/**
1880 * Reset all dirty pages by reinstating page monitoring.
1881 *
1882 * @param pVM The cross context VM structure.
1883 * @param GCPhysPT Physical address of the page table
1884 */
1885void pgmPoolInvalidateDirtyPage(PVMCC pVM, RTGCPHYS GCPhysPT)
1886{
1887 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1888 PGM_LOCK_ASSERT_OWNER(pVM);
1889 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1890 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
1891
1892 if (!pPool->cDirtyPages)
1893 return;
1894
1895 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1896
1897 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1898 {
1899 unsigned idxPage = pPool->aidxDirtyPages[i];
1900 if (idxPage != NIL_PGMPOOL_IDX)
1901 {
1902 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1903 if (pPage->GCPhys == GCPhysPT)
1904 {
1905 idxDirtyPage = i;
1906 break;
1907 }
1908 }
1909 }
1910
1911 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
1912 {
1913 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
1914 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1915 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1916 {
1917 unsigned i;
1918 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1919 {
1920 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
1921 {
1922 pPool->idxFreeDirtyPage = i;
1923 break;
1924 }
1925 }
1926 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1927 }
1928 }
1929}
1930
1931#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1932
1933/**
1934 * Inserts a page into the GCPhys hash table.
1935 *
1936 * @param pPool The pool.
1937 * @param pPage The page.
1938 */
1939DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1940{
1941 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1942 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1943 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1944 pPage->iNext = pPool->aiHash[iHash];
1945 pPool->aiHash[iHash] = pPage->idx;
1946}
1947
1948
1949/**
1950 * Removes a page from the GCPhys hash table.
1951 *
1952 * @param pPool The pool.
1953 * @param pPage The page.
1954 */
1955DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1956{
1957 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1958 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1959 if (pPool->aiHash[iHash] == pPage->idx)
1960 pPool->aiHash[iHash] = pPage->iNext;
1961 else
1962 {
1963 uint16_t iPrev = pPool->aiHash[iHash];
1964 for (;;)
1965 {
1966 const int16_t i = pPool->aPages[iPrev].iNext;
1967 if (i == pPage->idx)
1968 {
1969 pPool->aPages[iPrev].iNext = pPage->iNext;
1970 break;
1971 }
1972 if (i == NIL_PGMPOOL_IDX)
1973 {
1974 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
1975 break;
1976 }
1977 iPrev = i;
1978 }
1979 }
1980 pPage->iNext = NIL_PGMPOOL_IDX;
1981}
1982
1983
1984/**
1985 * Frees up one cache page.
1986 *
1987 * @returns VBox status code.
1988 * @retval VINF_SUCCESS on success.
1989 * @param pPool The pool.
1990 * @param iUser The user index.
1991 */
1992static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1993{
1994 const PVMCC pVM = pPool->CTX_SUFF(pVM);
1995 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1996 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1997
1998 /*
1999 * Select one page from the tail of the age list.
2000 */
2001 PPGMPOOLPAGE pPage;
2002 for (unsigned iLoop = 0; ; iLoop++)
2003 {
2004 uint16_t iToFree = pPool->iAgeTail;
2005 if (iToFree == iUser && iUser != NIL_PGMPOOL_IDX)
2006 iToFree = pPool->aPages[iToFree].iAgePrev;
2007/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2008 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2009 {
2010 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2011 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2012 {
2013 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2014 continue;
2015 iToFree = i;
2016 break;
2017 }
2018 }
2019*/
2020 Assert(iToFree != iUser);
2021 AssertReleaseMsg(iToFree != NIL_PGMPOOL_IDX,
2022 ("iToFree=%#x (iAgeTail=%#x) iUser=%#x iLoop=%u - pPool=%p LB %#zx\n",
2023 iToFree, pPool->iAgeTail, iUser, iLoop, pPool,
2024 RT_UOFFSETOF_DYN(PGMPOOL, aPages[pPool->cMaxPages])
2025 + pPool->cMaxUsers * sizeof(PGMPOOLUSER)
2026 + pPool->cMaxPhysExts * sizeof(PGMPOOLPHYSEXT) ));
2027
2028 pPage = &pPool->aPages[iToFree];
2029
2030 /*
2031 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2032 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2033 */
2034 if ( !pgmPoolIsPageLocked(pPage)
2035 && pPage->idx >= PGMPOOL_IDX_FIRST /* paranoia (#6349) */)
2036 break;
2037 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2038 pgmPoolCacheUsed(pPool, pPage);
2039 AssertLogRelReturn(iLoop < 8192, VERR_PGM_POOL_TOO_MANY_LOOPS);
2040 }
2041
2042 /*
2043 * Found a usable page, flush it and return.
2044 */
2045 int rc = pgmPoolFlushPage(pPool, pPage);
2046 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2047 /** @todo find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2048 if (rc == VINF_SUCCESS)
2049 PGM_INVL_ALL_VCPU_TLBS(pVM);
2050 return rc;
2051}
2052
2053
2054/**
2055 * Checks if a kind mismatch is really a page being reused
2056 * or if it's just normal remappings.
2057 *
2058 * @returns true if reused and the cached page (enmKind1) should be flushed
2059 * @returns false if not reused.
2060 * @param enmKind1 The kind of the cached page.
2061 * @param enmKind2 The kind of the requested page.
2062 */
2063static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2064{
2065 switch (enmKind1)
2066 {
2067 /*
2068 * Never reuse them. There is no remapping in non-paging mode.
2069 */
2070 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2071 case PGMPOOLKIND_32BIT_PD_PHYS:
2072 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2073 case PGMPOOLKIND_PAE_PD_PHYS:
2074 case PGMPOOLKIND_PAE_PDPT_PHYS:
2075 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2076 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2077 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2078 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2079 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2080 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2081 return false;
2082
2083 /*
2084 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2085 */
2086 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2087 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2088 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2089 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2090 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2091 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2092 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2093 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2094 case PGMPOOLKIND_32BIT_PD:
2095 case PGMPOOLKIND_PAE_PDPT:
2096 switch (enmKind2)
2097 {
2098 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2099 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2100 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2101 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2102 case PGMPOOLKIND_64BIT_PML4:
2103 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2104 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2105 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2106 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2107 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2108 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2109 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2110 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2111 return true;
2112 default:
2113 return false;
2114 }
2115
2116 /*
2117 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2118 */
2119 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2120 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2121 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2122 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2123 case PGMPOOLKIND_64BIT_PML4:
2124 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2125 switch (enmKind2)
2126 {
2127 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2128 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2129 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2130 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2131 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2132 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2133 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2134 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2135 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2136 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2137 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2138 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2139 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2140 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2141 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2142 return true;
2143 default:
2144 return false;
2145 }
2146
2147 /*
2148 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2149 */
2150 case PGMPOOLKIND_ROOT_NESTED:
2151 return false;
2152
2153 default:
2154 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2155 }
2156}
2157
2158
2159/**
2160 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2161 *
2162 * @returns VBox status code.
2163 * @retval VINF_PGM_CACHED_PAGE on success.
2164 * @retval VERR_FILE_NOT_FOUND if not found.
2165 * @param pPool The pool.
2166 * @param GCPhys The GC physical address of the page we're gonna shadow.
2167 * @param enmKind The kind of mapping.
2168 * @param enmAccess Access type for the mapping (only relevant for big pages)
2169 * @param fA20Enabled Whether the CPU has the A20 gate enabled.
2170 * @param iUser The shadow page pool index of the user table. This is
2171 * NIL_PGMPOOL_IDX for root pages.
2172 * @param iUserTable The index into the user table (shadowed). Ignored if
2173 * root page
2174 * @param ppPage Where to store the pointer to the page.
2175 */
2176static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
2177 uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2178{
2179 /*
2180 * Look up the GCPhys in the hash.
2181 */
2182 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2183 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2184 if (i != NIL_PGMPOOL_IDX)
2185 {
2186 do
2187 {
2188 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2189 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2190 if (pPage->GCPhys == GCPhys)
2191 {
2192 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2193 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess
2194 && pPage->fA20Enabled == fA20Enabled)
2195 {
2196 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2197 * doesn't flush it in case there are no more free use records.
2198 */
2199 pgmPoolCacheUsed(pPool, pPage);
2200
2201 int rc = VINF_SUCCESS;
2202 if (iUser != NIL_PGMPOOL_IDX)
2203 rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2204 if (RT_SUCCESS(rc))
2205 {
2206 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2207 *ppPage = pPage;
2208 if (pPage->cModifications)
2209 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2210 STAM_COUNTER_INC(&pPool->StatCacheHits);
2211 return VINF_PGM_CACHED_PAGE;
2212 }
2213 return rc;
2214 }
2215
2216 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2217 {
2218 /*
2219 * The kind is different. In some cases we should now flush the page
2220 * as it has been reused, but in most cases this is normal remapping
2221 * of PDs as PT or big pages using the GCPhys field in a slightly
2222 * different way than the other kinds.
2223 */
2224 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2225 {
2226 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2227 pgmPoolFlushPage(pPool, pPage);
2228 break;
2229 }
2230 }
2231 }
2232
2233 /* next */
2234 i = pPage->iNext;
2235 } while (i != NIL_PGMPOOL_IDX);
2236 }
2237
2238 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2239 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2240 return VERR_FILE_NOT_FOUND;
2241}
2242
2243
2244/**
2245 * Inserts a page into the cache.
2246 *
2247 * @param pPool The pool.
2248 * @param pPage The cached page.
2249 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2250 */
2251static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2252{
2253 /*
2254 * Insert into the GCPhys hash if the page is fit for that.
2255 */
2256 Assert(!pPage->fCached);
2257 if (fCanBeCached)
2258 {
2259 pPage->fCached = true;
2260 pgmPoolHashInsert(pPool, pPage);
2261 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2262 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2263 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2264 }
2265 else
2266 {
2267 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2268 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2269 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2270 }
2271
2272 /*
2273 * Insert at the head of the age list.
2274 */
2275 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2276 pPage->iAgeNext = pPool->iAgeHead;
2277 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2278 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2279 else
2280 pPool->iAgeTail = pPage->idx;
2281 pPool->iAgeHead = pPage->idx;
2282}
2283
2284
2285/**
2286 * Flushes a cached page.
2287 *
2288 * @param pPool The pool.
2289 * @param pPage The cached page.
2290 */
2291static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2292{
2293 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2294
2295 /*
2296 * Remove the page from the hash.
2297 */
2298 if (pPage->fCached)
2299 {
2300 pPage->fCached = false;
2301 pgmPoolHashRemove(pPool, pPage);
2302 }
2303 else
2304 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2305
2306 /*
2307 * Remove it from the age list.
2308 */
2309 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2310 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2311 else
2312 pPool->iAgeTail = pPage->iAgePrev;
2313 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2314 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2315 else
2316 pPool->iAgeHead = pPage->iAgeNext;
2317 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2318 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2319}
2320
2321
2322/**
2323 * Looks for pages sharing the monitor.
2324 *
2325 * @returns Pointer to the head page.
2326 * @returns NULL if not found.
2327 * @param pPool The Pool
2328 * @param pNewPage The page which is going to be monitored.
2329 */
2330static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2331{
2332 /*
2333 * Look up the GCPhys in the hash.
2334 */
2335 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2336 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2337 if (i == NIL_PGMPOOL_IDX)
2338 return NULL;
2339 do
2340 {
2341 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2342 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2343 && pPage != pNewPage)
2344 {
2345 switch (pPage->enmKind)
2346 {
2347 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2348 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2349 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2350 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2351 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2352 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2353 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2354 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2355 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2356 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2357 case PGMPOOLKIND_64BIT_PML4:
2358 case PGMPOOLKIND_32BIT_PD:
2359 case PGMPOOLKIND_PAE_PDPT:
2360 {
2361 /* find the head */
2362 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2363 {
2364 Assert(pPage->iMonitoredPrev != pPage->idx);
2365 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2366 }
2367 return pPage;
2368 }
2369
2370 /* ignore, no monitoring. */
2371 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2372 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2373 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2374 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2375 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2376 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2377 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2378 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2379 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2380 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2381 case PGMPOOLKIND_ROOT_NESTED:
2382 case PGMPOOLKIND_PAE_PD_PHYS:
2383 case PGMPOOLKIND_PAE_PDPT_PHYS:
2384 case PGMPOOLKIND_32BIT_PD_PHYS:
2385 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2386 break;
2387 default:
2388 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2389 }
2390 }
2391
2392 /* next */
2393 i = pPage->iNext;
2394 } while (i != NIL_PGMPOOL_IDX);
2395 return NULL;
2396}
2397
2398
2399/**
2400 * Enabled write monitoring of a guest page.
2401 *
2402 * @returns VBox status code.
2403 * @retval VINF_SUCCESS on success.
2404 * @param pPool The pool.
2405 * @param pPage The cached page.
2406 */
2407static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2408{
2409 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2410
2411 /*
2412 * Filter out the relevant kinds.
2413 */
2414 switch (pPage->enmKind)
2415 {
2416 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2417 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2418 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2419 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2420 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2421 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2422 case PGMPOOLKIND_64BIT_PML4:
2423 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2424 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2425 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2426 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2427 case PGMPOOLKIND_32BIT_PD:
2428 case PGMPOOLKIND_PAE_PDPT:
2429 break;
2430
2431 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2432 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2433 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2434 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2435 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2436 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2437 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2438 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2439 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2440 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2441 case PGMPOOLKIND_ROOT_NESTED:
2442 /* Nothing to monitor here. */
2443 return VINF_SUCCESS;
2444
2445 case PGMPOOLKIND_32BIT_PD_PHYS:
2446 case PGMPOOLKIND_PAE_PDPT_PHYS:
2447 case PGMPOOLKIND_PAE_PD_PHYS:
2448 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2449 /* Nothing to monitor here. */
2450 return VINF_SUCCESS;
2451 default:
2452 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2453 }
2454
2455 /*
2456 * Install handler.
2457 */
2458 int rc;
2459 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2460 if (pPageHead)
2461 {
2462 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2463 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2464
2465#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2466 if (pPageHead->fDirty)
2467 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirtyEntry, false /* do not remove */);
2468#endif
2469
2470 pPage->iMonitoredPrev = pPageHead->idx;
2471 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2472 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2473 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2474 pPageHead->iMonitoredNext = pPage->idx;
2475 rc = VINF_SUCCESS;
2476 }
2477 else
2478 {
2479 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2480 PVMCC pVM = pPool->CTX_SUFF(pVM);
2481 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2482 rc = PGMHandlerPhysicalRegister(pVM, GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK, pPool->hAccessHandlerType,
2483 pPage - &pPool->aPages[0], NIL_RTR3PTR /*pszDesc*/);
2484 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2485 * the heap size should suffice. */
2486 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2487 PVMCPU pVCpu = VMMGetCpu(pVM);
2488 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2489 }
2490 pPage->fMonitored = true;
2491 return rc;
2492}
2493
2494
2495/**
2496 * Disables write monitoring of a guest page.
2497 *
2498 * @returns VBox status code.
2499 * @retval VINF_SUCCESS on success.
2500 * @param pPool The pool.
2501 * @param pPage The cached page.
2502 */
2503static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2504{
2505 /*
2506 * Filter out the relevant kinds.
2507 */
2508 switch (pPage->enmKind)
2509 {
2510 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2511 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2512 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2513 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2514 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2515 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2516 case PGMPOOLKIND_64BIT_PML4:
2517 case PGMPOOLKIND_32BIT_PD:
2518 case PGMPOOLKIND_PAE_PDPT:
2519 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2520 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2521 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2522 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2523 break;
2524
2525 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2526 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2527 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2528 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2529 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2530 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2531 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2532 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2533 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2534 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2535 case PGMPOOLKIND_ROOT_NESTED:
2536 case PGMPOOLKIND_PAE_PD_PHYS:
2537 case PGMPOOLKIND_PAE_PDPT_PHYS:
2538 case PGMPOOLKIND_32BIT_PD_PHYS:
2539 /* Nothing to monitor here. */
2540 Assert(!pPage->fMonitored);
2541 return VINF_SUCCESS;
2542
2543 default:
2544 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2545 }
2546 Assert(pPage->fMonitored);
2547
2548 /*
2549 * Remove the page from the monitored list or uninstall it if last.
2550 */
2551 const PVMCC pVM = pPool->CTX_SUFF(pVM);
2552 int rc;
2553 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2554 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2555 {
2556 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2557 {
2558 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2559 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2560 rc = PGMHandlerPhysicalChangeUserArg(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK, pPage->iMonitoredNext);
2561
2562 AssertFatalRCSuccess(rc);
2563 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2564 }
2565 else
2566 {
2567 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2568 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2569 {
2570 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2571 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2572 }
2573 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2574 rc = VINF_SUCCESS;
2575 }
2576 }
2577 else
2578 {
2579 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2580 AssertFatalRC(rc);
2581 PVMCPU pVCpu = VMMGetCpu(pVM);
2582 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2583 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2584 }
2585 pPage->fMonitored = false;
2586
2587 /*
2588 * Remove it from the list of modified pages (if in it).
2589 */
2590 pgmPoolMonitorModifiedRemove(pPool, pPage);
2591
2592 return rc;
2593}
2594
2595
2596/**
2597 * Inserts the page into the list of modified pages.
2598 *
2599 * @param pPool The pool.
2600 * @param pPage The page.
2601 */
2602void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2603{
2604 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2605 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2606 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2607 && pPool->iModifiedHead != pPage->idx,
2608 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2609 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2610 pPool->iModifiedHead, pPool->cModifiedPages));
2611
2612 pPage->iModifiedNext = pPool->iModifiedHead;
2613 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2614 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2615 pPool->iModifiedHead = pPage->idx;
2616 pPool->cModifiedPages++;
2617#ifdef VBOX_WITH_STATISTICS
2618 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2619 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2620#endif
2621}
2622
2623
2624/**
2625 * Removes the page from the list of modified pages and resets the
2626 * modification counter.
2627 *
2628 * @param pPool The pool.
2629 * @param pPage The page which is believed to be in the list of modified pages.
2630 */
2631static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2632{
2633 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2634 if (pPool->iModifiedHead == pPage->idx)
2635 {
2636 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2637 pPool->iModifiedHead = pPage->iModifiedNext;
2638 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2639 {
2640 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2641 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2642 }
2643 pPool->cModifiedPages--;
2644 }
2645 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2646 {
2647 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2648 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2649 {
2650 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2651 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2652 }
2653 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2654 pPool->cModifiedPages--;
2655 }
2656 else
2657 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2658 pPage->cModifications = 0;
2659}
2660
2661
2662/**
2663 * Zaps the list of modified pages, resetting their modification counters in the process.
2664 *
2665 * @param pVM The cross context VM structure.
2666 */
2667static void pgmPoolMonitorModifiedClearAll(PVMCC pVM)
2668{
2669 PGM_LOCK_VOID(pVM);
2670 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2671 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2672
2673 unsigned cPages = 0; NOREF(cPages);
2674
2675#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2676 pgmPoolResetDirtyPages(pVM);
2677#endif
2678
2679 uint16_t idx = pPool->iModifiedHead;
2680 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2681 while (idx != NIL_PGMPOOL_IDX)
2682 {
2683 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2684 idx = pPage->iModifiedNext;
2685 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2686 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2687 pPage->cModifications = 0;
2688 Assert(++cPages);
2689 }
2690 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2691 pPool->cModifiedPages = 0;
2692 PGM_UNLOCK(pVM);
2693}
2694
2695
2696/**
2697 * Handle SyncCR3 pool tasks
2698 *
2699 * @returns VBox status code.
2700 * @retval VINF_SUCCESS if successfully added.
2701 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2702 * @param pVCpu The cross context virtual CPU structure.
2703 * @remark Should only be used when monitoring is available, thus placed in
2704 * the PGMPOOL_WITH_MONITORING \#ifdef.
2705 */
2706int pgmPoolSyncCR3(PVMCPUCC pVCpu)
2707{
2708 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2709 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2710
2711 /*
2712 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2713 * Occasionally we will have to clear all the shadow page tables because we wanted
2714 * to monitor a page which was mapped by too many shadowed page tables. This operation
2715 * sometimes referred to as a 'lightweight flush'.
2716 */
2717# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2718 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2719 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2720# else /* !IN_RING3 */
2721 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2722 {
2723 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2724 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2725
2726 /* Make sure all other VCPUs return to ring 3. */
2727 if (pVM->cCpus > 1)
2728 {
2729 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2730 PGM_INVL_ALL_VCPU_TLBS(pVM);
2731 }
2732 return VINF_PGM_SYNC_CR3;
2733 }
2734# endif /* !IN_RING3 */
2735 else
2736 {
2737 pgmPoolMonitorModifiedClearAll(pVM);
2738
2739 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2740 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2741 {
2742 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2743 return pgmPoolSyncCR3(pVCpu);
2744 }
2745 }
2746 return VINF_SUCCESS;
2747}
2748
2749
2750/**
2751 * Frees up at least one user entry.
2752 *
2753 * @returns VBox status code.
2754 * @retval VINF_SUCCESS if successfully added.
2755 *
2756 * @param pPool The pool.
2757 * @param iUser The user index.
2758 */
2759static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2760{
2761 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2762 /*
2763 * Just free cached pages in a braindead fashion.
2764 */
2765 /** @todo walk the age list backwards and free the first with usage. */
2766 int rc = VINF_SUCCESS;
2767 do
2768 {
2769 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2770 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2771 rc = rc2;
2772 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2773 return rc;
2774}
2775
2776
2777/**
2778 * Inserts a page into the cache.
2779 *
2780 * This will create user node for the page, insert it into the GCPhys
2781 * hash, and insert it into the age list.
2782 *
2783 * @returns VBox status code.
2784 * @retval VINF_SUCCESS if successfully added.
2785 *
2786 * @param pPool The pool.
2787 * @param pPage The cached page.
2788 * @param GCPhys The GC physical address of the page we're gonna shadow.
2789 * @param iUser The user index.
2790 * @param iUserTable The user table index.
2791 */
2792DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2793{
2794 int rc = VINF_SUCCESS;
2795 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2796
2797 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable)); RT_NOREF_PV(GCPhys);
2798
2799 if (iUser != NIL_PGMPOOL_IDX)
2800 {
2801#ifdef VBOX_STRICT
2802 /*
2803 * Check that the entry doesn't already exists.
2804 */
2805 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2806 {
2807 uint16_t i = pPage->iUserHead;
2808 do
2809 {
2810 Assert(i < pPool->cMaxUsers);
2811 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2812 i = paUsers[i].iNext;
2813 } while (i != NIL_PGMPOOL_USER_INDEX);
2814 }
2815#endif
2816
2817 /*
2818 * Find free a user node.
2819 */
2820 uint16_t i = pPool->iUserFreeHead;
2821 if (i == NIL_PGMPOOL_USER_INDEX)
2822 {
2823 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2824 if (RT_FAILURE(rc))
2825 return rc;
2826 i = pPool->iUserFreeHead;
2827 }
2828
2829 /*
2830 * Unlink the user node from the free list,
2831 * initialize and insert it into the user list.
2832 */
2833 pPool->iUserFreeHead = paUsers[i].iNext;
2834 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2835 paUsers[i].iUser = iUser;
2836 paUsers[i].iUserTable = iUserTable;
2837 pPage->iUserHead = i;
2838 }
2839 else
2840 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2841
2842
2843 /*
2844 * Insert into cache and enable monitoring of the guest page if enabled.
2845 *
2846 * Until we implement caching of all levels, including the CR3 one, we'll
2847 * have to make sure we don't try monitor & cache any recursive reuse of
2848 * a monitored CR3 page. Because all windows versions are doing this we'll
2849 * have to be able to do combined access monitoring, CR3 + PT and
2850 * PD + PT (guest PAE).
2851 *
2852 * Update:
2853 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2854 */
2855 const bool fCanBeMonitored = true;
2856 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2857 if (fCanBeMonitored)
2858 {
2859 rc = pgmPoolMonitorInsert(pPool, pPage);
2860 AssertRC(rc);
2861 }
2862 return rc;
2863}
2864
2865
2866/**
2867 * Adds a user reference to a page.
2868 *
2869 * This will move the page to the head of the
2870 *
2871 * @returns VBox status code.
2872 * @retval VINF_SUCCESS if successfully added.
2873 *
2874 * @param pPool The pool.
2875 * @param pPage The cached page.
2876 * @param iUser The user index.
2877 * @param iUserTable The user table.
2878 */
2879static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2880{
2881 Log3(("pgmPoolTrackAddUser: GCPhys=%RGp iUser=%x iUserTable=%x\n", pPage->GCPhys, iUser, iUserTable));
2882 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2883 Assert(iUser != NIL_PGMPOOL_IDX);
2884
2885# ifdef VBOX_STRICT
2886 /*
2887 * Check that the entry doesn't already exists. We only allow multiple
2888 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
2889 */
2890 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2891 {
2892 uint16_t i = pPage->iUserHead;
2893 do
2894 {
2895 Assert(i < pPool->cMaxUsers);
2896 /** @todo this assertion looks odd... Shouldn't it be && here? */
2897 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2898 i = paUsers[i].iNext;
2899 } while (i != NIL_PGMPOOL_USER_INDEX);
2900 }
2901# endif
2902
2903 /*
2904 * Allocate a user node.
2905 */
2906 uint16_t i = pPool->iUserFreeHead;
2907 if (i == NIL_PGMPOOL_USER_INDEX)
2908 {
2909 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2910 if (RT_FAILURE(rc))
2911 return rc;
2912 i = pPool->iUserFreeHead;
2913 }
2914 pPool->iUserFreeHead = paUsers[i].iNext;
2915
2916 /*
2917 * Initialize the user node and insert it.
2918 */
2919 paUsers[i].iNext = pPage->iUserHead;
2920 paUsers[i].iUser = iUser;
2921 paUsers[i].iUserTable = iUserTable;
2922 pPage->iUserHead = i;
2923
2924# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2925 if (pPage->fDirty)
2926 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirtyEntry, false /* do not remove */);
2927# endif
2928
2929 /*
2930 * Tell the cache to update its replacement stats for this page.
2931 */
2932 pgmPoolCacheUsed(pPool, pPage);
2933 return VINF_SUCCESS;
2934}
2935
2936
2937/**
2938 * Frees a user record associated with a page.
2939 *
2940 * This does not clear the entry in the user table, it simply replaces the
2941 * user record to the chain of free records.
2942 *
2943 * @param pPool The pool.
2944 * @param pPage The shadow page.
2945 * @param iUser The shadow page pool index of the user table.
2946 * @param iUserTable The index into the user table (shadowed).
2947 *
2948 * @remarks Don't call this for root pages.
2949 */
2950static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2951{
2952 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2953 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2954 Assert(iUser != NIL_PGMPOOL_IDX);
2955
2956 /*
2957 * Unlink and free the specified user entry.
2958 */
2959
2960 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2961 uint16_t i = pPage->iUserHead;
2962 if ( i != NIL_PGMPOOL_USER_INDEX
2963 && paUsers[i].iUser == iUser
2964 && paUsers[i].iUserTable == iUserTable)
2965 {
2966 pPage->iUserHead = paUsers[i].iNext;
2967
2968 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2969 paUsers[i].iNext = pPool->iUserFreeHead;
2970 pPool->iUserFreeHead = i;
2971 return;
2972 }
2973
2974 /* General: Linear search. */
2975 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2976 while (i != NIL_PGMPOOL_USER_INDEX)
2977 {
2978 if ( paUsers[i].iUser == iUser
2979 && paUsers[i].iUserTable == iUserTable)
2980 {
2981 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2982 paUsers[iPrev].iNext = paUsers[i].iNext;
2983 else
2984 pPage->iUserHead = paUsers[i].iNext;
2985
2986 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2987 paUsers[i].iNext = pPool->iUserFreeHead;
2988 pPool->iUserFreeHead = i;
2989 return;
2990 }
2991 iPrev = i;
2992 i = paUsers[i].iNext;
2993 }
2994
2995 /* Fatal: didn't find it */
2996 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
2997 iUser, iUserTable, pPage->GCPhys));
2998}
2999
3000
3001#if 0 /* unused */
3002/**
3003 * Gets the entry size of a shadow table.
3004 *
3005 * @param enmKind The kind of page.
3006 *
3007 * @returns The size of the entry in bytes. That is, 4 or 8.
3008 * @returns If the kind is not for a table, an assertion is raised and 0 is
3009 * returned.
3010 */
3011DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
3012{
3013 switch (enmKind)
3014 {
3015 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3016 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3017 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3018 case PGMPOOLKIND_32BIT_PD:
3019 case PGMPOOLKIND_32BIT_PD_PHYS:
3020 return 4;
3021
3022 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3023 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3024 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3025 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3026 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3027 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3028 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3029 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3030 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3031 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3032 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3033 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3034 case PGMPOOLKIND_64BIT_PML4:
3035 case PGMPOOLKIND_PAE_PDPT:
3036 case PGMPOOLKIND_ROOT_NESTED:
3037 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3038 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3039 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3040 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3041 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3042 case PGMPOOLKIND_PAE_PD_PHYS:
3043 case PGMPOOLKIND_PAE_PDPT_PHYS:
3044 return 8;
3045
3046 default:
3047 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3048 }
3049}
3050#endif /* unused */
3051
3052#if 0 /* unused */
3053/**
3054 * Gets the entry size of a guest table.
3055 *
3056 * @param enmKind The kind of page.
3057 *
3058 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3059 * @returns If the kind is not for a table, an assertion is raised and 0 is
3060 * returned.
3061 */
3062DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3063{
3064 switch (enmKind)
3065 {
3066 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3067 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3068 case PGMPOOLKIND_32BIT_PD:
3069 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3070 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3071 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3072 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3073 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3074 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3075 return 4;
3076
3077 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3078 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3079 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3080 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3081 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3082 case PGMPOOLKIND_64BIT_PML4:
3083 case PGMPOOLKIND_PAE_PDPT:
3084 return 8;
3085
3086 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3087 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3088 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3089 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3090 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3091 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3092 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3093 case PGMPOOLKIND_ROOT_NESTED:
3094 case PGMPOOLKIND_PAE_PD_PHYS:
3095 case PGMPOOLKIND_PAE_PDPT_PHYS:
3096 case PGMPOOLKIND_32BIT_PD_PHYS:
3097 /** @todo can we return 0? (nobody is calling this...) */
3098 AssertFailed();
3099 return 0;
3100
3101 default:
3102 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3103 }
3104}
3105#endif /* unused */
3106
3107
3108/**
3109 * Checks one shadow page table entry for a mapping of a physical page.
3110 *
3111 * @returns true / false indicating removal of all relevant PTEs
3112 *
3113 * @param pVM The cross context VM structure.
3114 * @param pPhysPage The guest page in question.
3115 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3116 * @param iShw The shadow page table.
3117 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3118 */
3119static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3120{
3121 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3122 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3123 bool fRet = false;
3124
3125 /*
3126 * Assert sanity.
3127 */
3128 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3129 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3130 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3131
3132 /*
3133 * Then, clear the actual mappings to the page in the shadow PT.
3134 */
3135 switch (pPage->enmKind)
3136 {
3137 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3138 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3139 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3140 {
3141 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3142 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3143 uint32_t u32AndMask = 0;
3144 uint32_t u32OrMask = 0;
3145
3146 if (!fFlushPTEs)
3147 {
3148 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3149 {
3150 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3151 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3152 u32OrMask = X86_PTE_RW;
3153 u32AndMask = UINT32_MAX;
3154 fRet = true;
3155 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3156 break;
3157
3158 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3159 u32OrMask = 0;
3160 u32AndMask = ~X86_PTE_RW;
3161 fRet = true;
3162 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3163 break;
3164 default:
3165 /* (shouldn't be here, will assert below) */
3166 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3167 break;
3168 }
3169 }
3170 else
3171 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3172
3173 /* Update the counter if we're removing references. */
3174 if (!u32AndMask)
3175 {
3176 Assert(pPage->cPresent);
3177 Assert(pPool->cPresent);
3178 pPage->cPresent--;
3179 pPool->cPresent--;
3180 }
3181
3182 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3183 {
3184 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3185 X86PTE Pte;
3186 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3187 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3188 Pte.u &= ~(X86PGUINT)X86_PTE_RW; /* need to disallow writes when dirty bit tracking is still active. */
3189 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3190 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3191 return fRet;
3192 }
3193#ifdef LOG_ENABLED
3194 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3195 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3196 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3197 {
3198 Log(("i=%d cFound=%d\n", i, ++cFound));
3199 }
3200#endif
3201 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3202 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3203 break;
3204 }
3205
3206 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3207 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3208 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3209 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3210 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3211 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3212 {
3213 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3214 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3215 uint64_t u64OrMask = 0;
3216 uint64_t u64AndMask = 0;
3217
3218 if (!fFlushPTEs)
3219 {
3220 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3221 {
3222 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3223 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3224 u64OrMask = X86_PTE_RW;
3225 u64AndMask = UINT64_MAX;
3226 fRet = true;
3227 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3228 break;
3229
3230 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3231 u64OrMask = 0;
3232 u64AndMask = ~(uint64_t)X86_PTE_RW;
3233 fRet = true;
3234 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3235 break;
3236
3237 default:
3238 /* (shouldn't be here, will assert below) */
3239 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3240 break;
3241 }
3242 }
3243 else
3244 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3245
3246 /* Update the counter if we're removing references. */
3247 if (!u64AndMask)
3248 {
3249 Assert(pPage->cPresent);
3250 Assert(pPool->cPresent);
3251 pPage->cPresent--;
3252 pPool->cPresent--;
3253 }
3254
3255 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3256 {
3257 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3258 X86PTEPAE Pte;
3259 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3260 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3261 Pte.u &= ~(X86PGPAEUINT)X86_PTE_RW; /* need to disallow writes when dirty bit tracking is still active. */
3262
3263 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3264 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3265 return fRet;
3266 }
3267#ifdef LOG_ENABLED
3268 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3269 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3270 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3271 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3272 Log(("i=%d cFound=%d\n", i, ++cFound));
3273#endif
3274 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3275 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3276 break;
3277 }
3278
3279#ifdef PGM_WITH_LARGE_PAGES
3280 /* Large page case only. */
3281 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3282 {
3283 Assert(pVM->pgm.s.fNestedPaging);
3284
3285 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3286 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3287
3288 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3289 {
3290 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3291 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3292 pPD->a[iPte].u = 0;
3293 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3294
3295 /* Update the counter as we're removing references. */
3296 Assert(pPage->cPresent);
3297 Assert(pPool->cPresent);
3298 pPage->cPresent--;
3299 pPool->cPresent--;
3300
3301 return fRet;
3302 }
3303# ifdef LOG_ENABLED
3304 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3305 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3306 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3307 Log(("i=%d cFound=%d\n", i, ++cFound));
3308# endif
3309 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3310 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3311 break;
3312 }
3313
3314 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3315 case PGMPOOLKIND_PAE_PD_PHYS:
3316 {
3317 Assert(pVM->pgm.s.fNestedPaging);
3318
3319 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3320 PX86PDPAE pPD = (PX86PDPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3321
3322 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3323 {
3324 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3325 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3326 pPD->a[iPte].u = 0;
3327 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3328
3329 /* Update the counter as we're removing references. */
3330 Assert(pPage->cPresent);
3331 Assert(pPool->cPresent);
3332 pPage->cPresent--;
3333 pPool->cPresent--;
3334 return fRet;
3335 }
3336# ifdef LOG_ENABLED
3337 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3338 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3339 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3340 Log(("i=%d cFound=%d\n", i, ++cFound));
3341# endif
3342 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3343 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3344 break;
3345 }
3346#endif /* PGM_WITH_LARGE_PAGES */
3347
3348 default:
3349 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3350 }
3351
3352 /* not reached. */
3353#ifndef _MSC_VER
3354 return fRet;
3355#endif
3356}
3357
3358
3359/**
3360 * Scans one shadow page table for mappings of a physical page.
3361 *
3362 * @param pVM The cross context VM structure.
3363 * @param pPhysPage The guest page in question.
3364 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3365 * @param iShw The shadow page table.
3366 */
3367static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3368{
3369 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3370
3371 /* We should only come here with when there's only one reference to this physical page. */
3372 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3373
3374 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3375 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3376 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3377 if (!fKeptPTEs)
3378 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3379 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3380}
3381
3382
3383/**
3384 * Flushes a list of shadow page tables mapping the same physical page.
3385 *
3386 * @param pVM The cross context VM structure.
3387 * @param pPhysPage The guest page in question.
3388 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3389 * @param iPhysExt The physical cross reference extent list to flush.
3390 */
3391static void pgmPoolTrackFlushGCPhysPTs(PVMCC pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3392{
3393 PGM_LOCK_ASSERT_OWNER(pVM);
3394 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3395 bool fKeepList = false;
3396
3397 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3398 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt=%u\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3399
3400 const uint16_t iPhysExtStart = iPhysExt;
3401 PPGMPOOLPHYSEXT pPhysExt;
3402 do
3403 {
3404 Assert(iPhysExt < pPool->cMaxPhysExts);
3405 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3406 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3407 {
3408 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3409 {
3410 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3411 if (!fKeptPTEs)
3412 {
3413 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3414 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3415 }
3416 else
3417 fKeepList = true;
3418 }
3419 }
3420 /* next */
3421 iPhysExt = pPhysExt->iNext;
3422 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3423
3424 if (!fKeepList)
3425 {
3426 /* insert the list into the free list and clear the ram range entry. */
3427 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3428 pPool->iPhysExtFreeHead = iPhysExtStart;
3429 /* Invalidate the tracking data. */
3430 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3431 }
3432
3433 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3434}
3435
3436
3437/**
3438 * Flushes all shadow page table mappings of the given guest page.
3439 *
3440 * This is typically called when the host page backing the guest one has been
3441 * replaced or when the page protection was changed due to a guest access
3442 * caught by the monitoring.
3443 *
3444 * @returns VBox status code.
3445 * @retval VINF_SUCCESS if all references has been successfully cleared.
3446 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3447 * pool cleaning. FF and sync flags are set.
3448 *
3449 * @param pVM The cross context VM structure.
3450 * @param GCPhysPage GC physical address of the page in question
3451 * @param pPhysPage The guest page in question.
3452 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3453 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3454 * flushed, it is NOT touched if this isn't necessary.
3455 * The caller MUST initialized this to @a false.
3456 */
3457int pgmPoolTrackUpdateGCPhys(PVMCC pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3458{
3459 PVMCPUCC pVCpu = VMMGetCpu(pVM);
3460 PGM_LOCK_VOID(pVM);
3461 int rc = VINF_SUCCESS;
3462
3463#ifdef PGM_WITH_LARGE_PAGES
3464 /* Is this page part of a large page? */
3465 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3466 {
3467 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3468 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3469
3470 /* Fetch the large page base. */
3471 PPGMPAGE pLargePage;
3472 if (GCPhysBase != GCPhysPage)
3473 {
3474 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3475 AssertFatal(pLargePage);
3476 }
3477 else
3478 pLargePage = pPhysPage;
3479
3480 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3481
3482 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3483 {
3484 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3485 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3486 pVM->pgm.s.cLargePagesDisabled++;
3487
3488 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3489 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3490
3491 *pfFlushTLBs = true;
3492 PGM_UNLOCK(pVM);
3493 return rc;
3494 }
3495 }
3496#else
3497 NOREF(GCPhysPage);
3498#endif /* PGM_WITH_LARGE_PAGES */
3499
3500 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3501 if (u16)
3502 {
3503 /*
3504 * The zero page is currently screwing up the tracking and we'll
3505 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3506 * is defined, zero pages won't normally be mapped. Some kind of solution
3507 * will be needed for this problem of course, but it will have to wait...
3508 */
3509 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3510 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3511 rc = VINF_PGM_GCPHYS_ALIASED;
3512 else
3513 {
3514 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3515 {
3516 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3517 pgmPoolTrackFlushGCPhysPT(pVM,
3518 pPhysPage,
3519 fFlushPTEs,
3520 PGMPOOL_TD_GET_IDX(u16));
3521 }
3522 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3523 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3524 else
3525 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3526 *pfFlushTLBs = true;
3527 }
3528 }
3529
3530 if (rc == VINF_PGM_GCPHYS_ALIASED)
3531 {
3532 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3533 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3534 rc = VINF_PGM_SYNC_CR3;
3535 }
3536 PGM_UNLOCK(pVM);
3537 return rc;
3538}
3539
3540
3541/**
3542 * Scans all shadow page tables for mappings of a physical page.
3543 *
3544 * This may be slow, but it's most likely more efficient than cleaning
3545 * out the entire page pool / cache.
3546 *
3547 * @returns VBox status code.
3548 * @retval VINF_SUCCESS if all references has been successfully cleared.
3549 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3550 * a page pool cleaning.
3551 *
3552 * @param pVM The cross context VM structure.
3553 * @param pPhysPage The guest page in question.
3554 */
3555int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage)
3556{
3557 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3558 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3559 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3560 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3561
3562 /*
3563 * There is a limit to what makes sense.
3564 */
3565 if ( pPool->cPresent > 1024
3566 && pVM->cCpus == 1)
3567 {
3568 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3569 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3570 return VINF_PGM_GCPHYS_ALIASED;
3571 }
3572
3573 /*
3574 * Iterate all the pages until we've encountered all that in use.
3575 * This is simple but not quite optimal solution.
3576 */
3577 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage);
3578 unsigned cLeft = pPool->cUsedPages;
3579 unsigned iPage = pPool->cCurPages;
3580 while (--iPage >= PGMPOOL_IDX_FIRST)
3581 {
3582 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3583 if ( pPage->GCPhys != NIL_RTGCPHYS
3584 && pPage->cPresent)
3585 {
3586 switch (pPage->enmKind)
3587 {
3588 /*
3589 * We only care about shadow page tables.
3590 */
3591 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3592 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3593 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3594 {
3595 const uint32_t u32 = (uint32_t)u64;
3596 unsigned cPresent = pPage->cPresent;
3597 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3598 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3599 {
3600 const X86PGUINT uPte = pPT->a[i].u;
3601 if (uPte & X86_PTE_P)
3602 {
3603 if ((uPte & X86_PTE_PG_MASK) == u32)
3604 {
3605 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3606 ASMAtomicWriteU32(&pPT->a[i].u, 0);
3607
3608 /* Update the counter as we're removing references. */
3609 Assert(pPage->cPresent);
3610 Assert(pPool->cPresent);
3611 pPage->cPresent--;
3612 pPool->cPresent--;
3613 }
3614 if (!--cPresent)
3615 break;
3616 }
3617 }
3618 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3619 break;
3620 }
3621
3622 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3623 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3624 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3625 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3626 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3627 {
3628 unsigned cPresent = pPage->cPresent;
3629 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3630 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3631 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3632 {
3633 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & X86_PTE_PAE_PG_MASK) == u64)
3634 {
3635 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3636 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[i], 0); /// @todo why not atomic?
3637
3638 /* Update the counter as we're removing references. */
3639 Assert(pPage->cPresent);
3640 Assert(pPool->cPresent);
3641 pPage->cPresent--;
3642 pPool->cPresent--;
3643 }
3644 if (!--cPresent)
3645 break;
3646 }
3647 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3648 break;
3649 }
3650
3651 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3652 {
3653 unsigned cPresent = pPage->cPresent;
3654 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3655 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3656 {
3657 X86PGPAEUINT const uPte = pPT->a[i].u;
3658 if (uPte & EPT_E_READ)
3659 {
3660 if ((uPte & EPT_PTE_PG_MASK) == u64)
3661 {
3662 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3663 ASMAtomicWriteU64(&pPT->a[i].u, 0);
3664
3665 /* Update the counter as we're removing references. */
3666 Assert(pPage->cPresent);
3667 Assert(pPool->cPresent);
3668 pPage->cPresent--;
3669 pPool->cPresent--;
3670 }
3671 if (!--cPresent)
3672 break;
3673 }
3674 }
3675 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3676 break;
3677 }
3678 }
3679
3680 if (!--cLeft)
3681 break;
3682 }
3683 }
3684
3685 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3686 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3687
3688 /*
3689 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3690 */
3691 if (pPool->cPresent > 1024)
3692 {
3693 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3694 return VINF_PGM_GCPHYS_ALIASED;
3695 }
3696
3697 return VINF_SUCCESS;
3698}
3699
3700
3701/**
3702 * Clears the user entry in a user table.
3703 *
3704 * This is used to remove all references to a page when flushing it.
3705 */
3706static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3707{
3708 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3709 Assert(pUser->iUser < pPool->cCurPages);
3710 uint32_t iUserTable = pUser->iUserTable;
3711
3712 /*
3713 * Map the user page. Ignore references made by fictitious pages.
3714 */
3715 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3716 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3717 union
3718 {
3719 uint64_t *pau64;
3720 uint32_t *pau32;
3721 } u;
3722 if (pUserPage->idx < PGMPOOL_IDX_FIRST)
3723 {
3724 Assert(!pUserPage->pvPageR3);
3725 return;
3726 }
3727 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3728
3729
3730 /* Safety precaution in case we change the paging for other modes too in the future. */
3731 Assert(!pgmPoolIsPageLocked(pPage)); RT_NOREF_PV(pPage);
3732
3733#ifdef VBOX_STRICT
3734 /*
3735 * Some sanity checks.
3736 */
3737 switch (pUserPage->enmKind)
3738 {
3739 case PGMPOOLKIND_32BIT_PD:
3740 case PGMPOOLKIND_32BIT_PD_PHYS:
3741 Assert(iUserTable < X86_PG_ENTRIES);
3742 break;
3743 case PGMPOOLKIND_PAE_PDPT:
3744 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3745 case PGMPOOLKIND_PAE_PDPT_PHYS:
3746 Assert(iUserTable < 4);
3747 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3748 break;
3749 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3750 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3751 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3752 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3753 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3754 case PGMPOOLKIND_PAE_PD_PHYS:
3755 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3756 break;
3757 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3758 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3759 break;
3760 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3761 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3762 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3763 break;
3764 case PGMPOOLKIND_64BIT_PML4:
3765 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3766 /* GCPhys >> PAGE_SHIFT is the index here */
3767 break;
3768 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3769 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3770 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3771 break;
3772
3773 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3774 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3775 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3776 break;
3777
3778 case PGMPOOLKIND_ROOT_NESTED:
3779 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3780 break;
3781
3782 default:
3783 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3784 break;
3785 }
3786#endif /* VBOX_STRICT */
3787
3788 /*
3789 * Clear the entry in the user page.
3790 */
3791 switch (pUserPage->enmKind)
3792 {
3793 /* 32-bit entries */
3794 case PGMPOOLKIND_32BIT_PD:
3795 case PGMPOOLKIND_32BIT_PD_PHYS:
3796 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
3797 break;
3798
3799 /* 64-bit entries */
3800 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3801 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3802 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3803 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3804 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3805 case PGMPOOLKIND_PAE_PD_PHYS:
3806 case PGMPOOLKIND_PAE_PDPT_PHYS:
3807 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3808 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3809 case PGMPOOLKIND_64BIT_PML4:
3810 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3811 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3812 case PGMPOOLKIND_PAE_PDPT:
3813 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3814 case PGMPOOLKIND_ROOT_NESTED:
3815 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3816 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3817 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
3818 break;
3819
3820 default:
3821 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3822 }
3823 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
3824}
3825
3826
3827/**
3828 * Clears all users of a page.
3829 */
3830static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3831{
3832 /*
3833 * Free all the user records.
3834 */
3835 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3836
3837 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3838 uint16_t i = pPage->iUserHead;
3839 while (i != NIL_PGMPOOL_USER_INDEX)
3840 {
3841 /* Clear enter in user table. */
3842 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3843
3844 /* Free it. */
3845 const uint16_t iNext = paUsers[i].iNext;
3846 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3847 paUsers[i].iNext = pPool->iUserFreeHead;
3848 pPool->iUserFreeHead = i;
3849
3850 /* Next. */
3851 i = iNext;
3852 }
3853 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3854}
3855
3856
3857/**
3858 * Allocates a new physical cross reference extent.
3859 *
3860 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3861 * @param pVM The cross context VM structure.
3862 * @param piPhysExt Where to store the phys ext index.
3863 */
3864PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVMCC pVM, uint16_t *piPhysExt)
3865{
3866 PGM_LOCK_ASSERT_OWNER(pVM);
3867 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3868 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3869 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3870 {
3871 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3872 return NULL;
3873 }
3874 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3875 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3876 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3877 *piPhysExt = iPhysExt;
3878 return pPhysExt;
3879}
3880
3881
3882/**
3883 * Frees a physical cross reference extent.
3884 *
3885 * @param pVM The cross context VM structure.
3886 * @param iPhysExt The extent to free.
3887 */
3888void pgmPoolTrackPhysExtFree(PVMCC pVM, uint16_t iPhysExt)
3889{
3890 PGM_LOCK_ASSERT_OWNER(pVM);
3891 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3892 Assert(iPhysExt < pPool->cMaxPhysExts);
3893 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3894 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3895 {
3896 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3897 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3898 }
3899 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3900 pPool->iPhysExtFreeHead = iPhysExt;
3901}
3902
3903
3904/**
3905 * Frees a physical cross reference extent.
3906 *
3907 * @param pVM The cross context VM structure.
3908 * @param iPhysExt The extent to free.
3909 */
3910void pgmPoolTrackPhysExtFreeList(PVMCC pVM, uint16_t iPhysExt)
3911{
3912 PGM_LOCK_ASSERT_OWNER(pVM);
3913 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3914
3915 const uint16_t iPhysExtStart = iPhysExt;
3916 PPGMPOOLPHYSEXT pPhysExt;
3917 do
3918 {
3919 Assert(iPhysExt < pPool->cMaxPhysExts);
3920 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3921 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3922 {
3923 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3924 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3925 }
3926
3927 /* next */
3928 iPhysExt = pPhysExt->iNext;
3929 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3930
3931 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3932 pPool->iPhysExtFreeHead = iPhysExtStart;
3933}
3934
3935
3936/**
3937 * Insert a reference into a list of physical cross reference extents.
3938 *
3939 * @returns The new tracking data for PGMPAGE.
3940 *
3941 * @param pVM The cross context VM structure.
3942 * @param iPhysExt The physical extent index of the list head.
3943 * @param iShwPT The shadow page table index.
3944 * @param iPte Page table entry
3945 *
3946 */
3947static uint16_t pgmPoolTrackPhysExtInsert(PVMCC pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
3948{
3949 PGM_LOCK_ASSERT_OWNER(pVM);
3950 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3951 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3952
3953 /*
3954 * Special common cases.
3955 */
3956 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
3957 {
3958 paPhysExts[iPhysExt].aidx[1] = iShwPT;
3959 paPhysExts[iPhysExt].apte[1] = iPte;
3960 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
3961 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
3962 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3963 }
3964 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3965 {
3966 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3967 paPhysExts[iPhysExt].apte[2] = iPte;
3968 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
3969 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
3970 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3971 }
3972 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
3973
3974 /*
3975 * General treatment.
3976 */
3977 const uint16_t iPhysExtStart = iPhysExt;
3978 unsigned cMax = 15;
3979 for (;;)
3980 {
3981 Assert(iPhysExt < pPool->cMaxPhysExts);
3982 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3983 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3984 {
3985 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3986 paPhysExts[iPhysExt].apte[i] = iPte;
3987 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
3988 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
3989 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3990 }
3991 if (!--cMax)
3992 {
3993 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackOverflows);
3994 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3995 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
3996 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3997 }
3998
3999 /* advance */
4000 iPhysExt = paPhysExts[iPhysExt].iNext;
4001 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4002 break;
4003 }
4004
4005 /*
4006 * Add another extent to the list.
4007 */
4008 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4009 if (!pNew)
4010 {
4011 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackNoExtentsLeft);
4012 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4013 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4014 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4015 }
4016 pNew->iNext = iPhysExtStart;
4017 pNew->aidx[0] = iShwPT;
4018 pNew->apte[0] = iPte;
4019 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4020 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4021}
4022
4023
4024/**
4025 * Add a reference to guest physical page where extents are in use.
4026 *
4027 * @returns The new tracking data for PGMPAGE.
4028 *
4029 * @param pVM The cross context VM structure.
4030 * @param pPhysPage Pointer to the aPages entry in the ram range.
4031 * @param u16 The ram range flags (top 16-bits).
4032 * @param iShwPT The shadow page table index.
4033 * @param iPte Page table entry
4034 */
4035uint16_t pgmPoolTrackPhysExtAddref(PVMCC pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4036{
4037 PGM_LOCK_VOID(pVM);
4038 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4039 {
4040 /*
4041 * Convert to extent list.
4042 */
4043 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4044 uint16_t iPhysExt;
4045 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4046 if (pPhysExt)
4047 {
4048 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4049 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliased);
4050 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4051 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4052 pPhysExt->aidx[1] = iShwPT;
4053 pPhysExt->apte[1] = iPte;
4054 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4055 }
4056 else
4057 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4058 }
4059 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4060 {
4061 /*
4062 * Insert into the extent list.
4063 */
4064 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4065 }
4066 else
4067 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedLots);
4068 PGM_UNLOCK(pVM);
4069 return u16;
4070}
4071
4072
4073/**
4074 * Clear references to guest physical memory.
4075 *
4076 * @param pPool The pool.
4077 * @param pPage The page.
4078 * @param pPhysPage Pointer to the aPages entry in the ram range.
4079 * @param iPte Shadow PTE index
4080 */
4081void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4082{
4083 PVMCC pVM = pPool->CTX_SUFF(pVM);
4084 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4085 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4086
4087 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4088 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4089 {
4090 PGM_LOCK_VOID(pVM);
4091
4092 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4093 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4094 do
4095 {
4096 Assert(iPhysExt < pPool->cMaxPhysExts);
4097
4098 /*
4099 * Look for the shadow page and check if it's all freed.
4100 */
4101 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4102 {
4103 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4104 && paPhysExts[iPhysExt].apte[i] == iPte)
4105 {
4106 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4107 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4108
4109 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4110 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4111 {
4112 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4113 PGM_UNLOCK(pVM);
4114 return;
4115 }
4116
4117 /* we can free the node. */
4118 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4119 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4120 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4121 {
4122 /* lonely node */
4123 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4124 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4125 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4126 }
4127 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4128 {
4129 /* head */
4130 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4131 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4132 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4133 }
4134 else
4135 {
4136 /* in list */
4137 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4138 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4139 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4140 }
4141 iPhysExt = iPhysExtNext;
4142 PGM_UNLOCK(pVM);
4143 return;
4144 }
4145 }
4146
4147 /* next */
4148 iPhysExtPrev = iPhysExt;
4149 iPhysExt = paPhysExts[iPhysExt].iNext;
4150 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4151
4152 PGM_UNLOCK(pVM);
4153 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4154 }
4155 else /* nothing to do */
4156 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4157}
4158
4159/**
4160 * Clear references to guest physical memory.
4161 *
4162 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4163 * physical address is assumed to be correct, so the linear search can be
4164 * skipped and we can assert at an earlier point.
4165 *
4166 * @param pPool The pool.
4167 * @param pPage The page.
4168 * @param HCPhys The host physical address corresponding to the guest page.
4169 * @param GCPhys The guest physical address corresponding to HCPhys.
4170 * @param iPte Shadow PTE index
4171 */
4172static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4173{
4174 /*
4175 * Lookup the page and check if it checks out before derefing it.
4176 */
4177 PVMCC pVM = pPool->CTX_SUFF(pVM);
4178 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4179 if (pPhysPage)
4180 {
4181 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4182#ifdef LOG_ENABLED
4183 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4184 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4185#endif
4186 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4187 {
4188 Assert(pPage->cPresent);
4189 Assert(pPool->cPresent);
4190 pPage->cPresent--;
4191 pPool->cPresent--;
4192 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4193 return;
4194 }
4195
4196 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp\n",
4197 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage)));
4198 }
4199 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4200}
4201
4202
4203/**
4204 * Clear references to guest physical memory.
4205 *
4206 * @param pPool The pool.
4207 * @param pPage The page.
4208 * @param HCPhys The host physical address corresponding to the guest page.
4209 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4210 * @param iPte Shadow pte index
4211 */
4212void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4213{
4214 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4215
4216 /*
4217 * Try the hint first.
4218 */
4219 RTHCPHYS HCPhysHinted;
4220 PVMCC pVM = pPool->CTX_SUFF(pVM);
4221 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4222 if (pPhysPage)
4223 {
4224 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4225 Assert(HCPhysHinted);
4226 if (HCPhysHinted == HCPhys)
4227 {
4228 Assert(pPage->cPresent);
4229 Assert(pPool->cPresent);
4230 pPage->cPresent--;
4231 pPool->cPresent--;
4232 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4233 return;
4234 }
4235 }
4236 else
4237 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4238
4239 /*
4240 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4241 */
4242 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4243 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX);
4244 while (pRam)
4245 {
4246 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4247 while (iPage-- > 0)
4248 {
4249 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4250 {
4251 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4252 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4253 Assert(pPage->cPresent);
4254 Assert(pPool->cPresent);
4255 pPage->cPresent--;
4256 pPool->cPresent--;
4257 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4258 return;
4259 }
4260 }
4261 pRam = pRam->CTX_SUFF(pNext);
4262 }
4263
4264 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4265}
4266
4267
4268/**
4269 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4270 *
4271 * @param pPool The pool.
4272 * @param pPage The page.
4273 * @param pShwPT The shadow page table (mapping of the page).
4274 * @param pGstPT The guest page table.
4275 */
4276DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4277{
4278 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4279 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4280 {
4281 const X86PGUINT uPte = pShwPT->a[i].u;
4282 Assert(!(uPte & RT_BIT_32(10)));
4283 if (uPte & X86_PTE_P)
4284 {
4285 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4286 i, uPte & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4287 pgmPoolTracDerefGCPhysHint(pPool, pPage, uPte & X86_PTE_PG_MASK, pGstPT->a[i].u & fPgMask, i);
4288 if (!pPage->cPresent)
4289 break;
4290 }
4291 }
4292}
4293
4294
4295/**
4296 * Clear references to guest physical memory in a PAE / 32-bit page table.
4297 *
4298 * @param pPool The pool.
4299 * @param pPage The page.
4300 * @param pShwPT The shadow page table (mapping of the page).
4301 * @param pGstPT The guest page table (just a half one).
4302 */
4303DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4304{
4305 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4306 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4307 {
4308 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4309 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4310 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4311 {
4312 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4313 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4314 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4315 if (!pPage->cPresent)
4316 break;
4317 }
4318 }
4319}
4320
4321
4322/**
4323 * Clear references to guest physical memory in a PAE / PAE page table.
4324 *
4325 * @param pPool The pool.
4326 * @param pPage The page.
4327 * @param pShwPT The shadow page table (mapping of the page).
4328 * @param pGstPT The guest page table.
4329 */
4330DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4331{
4332 RTGCPHYS const fPgMask = pPage->fA20Enabled ? X86_PTE_PAE_PG_MASK : X86_PTE_PAE_PG_MASK & ~RT_BIT_64(20);
4333 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4334 {
4335 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4336 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4337 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4338 {
4339 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4340 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4341 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4342 if (!pPage->cPresent)
4343 break;
4344 }
4345 }
4346}
4347
4348
4349/**
4350 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4351 *
4352 * @param pPool The pool.
4353 * @param pPage The page.
4354 * @param pShwPT The shadow page table (mapping of the page).
4355 */
4356DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4357{
4358 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4359 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4360 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4361 {
4362 const X86PGUINT uPte = pShwPT->a[i].u;
4363 Assert(!(uPte & RT_BIT_32(10)));
4364 if (uPte & X86_PTE_P)
4365 {
4366 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4367 i, uPte & X86_PTE_PG_MASK, GCPhys));
4368 pgmPoolTracDerefGCPhys(pPool, pPage, uPte & X86_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4369 if (!pPage->cPresent)
4370 break;
4371 }
4372 }
4373}
4374
4375
4376/**
4377 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4378 *
4379 * @param pPool The pool.
4380 * @param pPage The page.
4381 * @param pShwPT The shadow page table (mapping of the page).
4382 */
4383DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4384{
4385 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4386 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4387 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4388 {
4389 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4390 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4391 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4392 {
4393 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4394 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4395 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys & GCPhysA20Mask, i);
4396 if (!pPage->cPresent)
4397 break;
4398 }
4399 }
4400}
4401
4402
4403/**
4404 * Clear references to shadowed pages in an EPT page table.
4405 *
4406 * @param pPool The pool.
4407 * @param pPage The page.
4408 * @param pShwPT The shadow page directory pointer table (mapping of the
4409 * page).
4410 */
4411DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4412{
4413 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4414 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4415 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4416 {
4417 X86PGPAEUINT const uPte = pShwPT->a[i].u;
4418 Assert((uPte & UINT64_C(0xfff0000000000f80)) == 0);
4419 if (uPte & EPT_E_READ)
4420 {
4421 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4422 i, uPte & EPT_PTE_PG_MASK, pPage->GCPhys));
4423 pgmPoolTracDerefGCPhys(pPool, pPage, uPte & EPT_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4424 if (!pPage->cPresent)
4425 break;
4426 }
4427 }
4428}
4429
4430
4431/**
4432 * Clear references to shadowed pages in a 32 bits page directory.
4433 *
4434 * @param pPool The pool.
4435 * @param pPage The page.
4436 * @param pShwPD The shadow page directory (mapping of the page).
4437 */
4438DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4439{
4440 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4441 {
4442 X86PGUINT const uPde = pShwPD->a[i].u;
4443 if (uPde & X86_PDE_P)
4444 {
4445 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4446 if (pSubPage)
4447 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4448 else
4449 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4450 }
4451 }
4452}
4453
4454
4455/**
4456 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4457 *
4458 * @param pPool The pool.
4459 * @param pPage The page.
4460 * @param pShwPD The shadow page directory (mapping of the page).
4461 */
4462DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4463{
4464 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4465 {
4466 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4467 if (uPde & X86_PDE_P)
4468 {
4469#ifdef PGM_WITH_LARGE_PAGES
4470 if (uPde & X86_PDE_PS)
4471 {
4472 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4473 i, uPde & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4474 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & X86_PDE2M_PAE_PG_MASK,
4475 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4476 i);
4477 }
4478 else
4479#endif
4480 {
4481 Assert((uPde & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000000))) == 0);
4482 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & X86_PDE_PAE_PG_MASK);
4483 if (pSubPage)
4484 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4485 else
4486 AssertFatalMsgFailed(("%RX64\n", uPde & X86_PDE_PAE_PG_MASK));
4487 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4488 }
4489 }
4490 }
4491}
4492
4493
4494/**
4495 * Clear references to shadowed pages in a PAE page directory pointer table.
4496 *
4497 * @param pPool The pool.
4498 * @param pPage The page.
4499 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4500 */
4501DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4502{
4503 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4504 {
4505 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4506 Assert((uPdpe & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4507 if (uPdpe & X86_PDPE_P)
4508 {
4509 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & X86_PDPE_PG_MASK);
4510 if (pSubPage)
4511 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4512 else
4513 AssertFatalMsgFailed(("%RX64\n", uPdpe & X86_PDPE_PG_MASK));
4514 }
4515 }
4516}
4517
4518
4519/**
4520 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4521 *
4522 * @param pPool The pool.
4523 * @param pPage The page.
4524 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4525 */
4526DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4527{
4528 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4529 {
4530 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4531 Assert((uPdpe & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4532 if (uPdpe & X86_PDPE_P)
4533 {
4534 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & X86_PDPE_PG_MASK);
4535 if (pSubPage)
4536 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4537 else
4538 AssertFatalMsgFailed(("%RX64\n", uPdpe & X86_PDPE_PG_MASK));
4539 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4540 }
4541 }
4542}
4543
4544
4545/**
4546 * Clear references to shadowed pages in a 64-bit level 4 page table.
4547 *
4548 * @param pPool The pool.
4549 * @param pPage The page.
4550 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4551 */
4552DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4553{
4554 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4555 {
4556 X86PGPAEUINT const uPml4e = pShwPML4->a[i].u;
4557 Assert((uPml4e & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4558 if (uPml4e & X86_PML4E_P)
4559 {
4560 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPml4e & X86_PDPE_PG_MASK);
4561 if (pSubPage)
4562 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4563 else
4564 AssertFatalMsgFailed(("%RX64\n", uPml4e & X86_PML4E_PG_MASK));
4565 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4566 }
4567 }
4568}
4569
4570
4571/**
4572 * Clear references to shadowed pages in an EPT page directory.
4573 *
4574 * @param pPool The pool.
4575 * @param pPage The page.
4576 * @param pShwPD The shadow page directory (mapping of the page).
4577 */
4578DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4579{
4580 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4581 {
4582 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4583 Assert((uPde & UINT64_C(0xfff0000000000f80)) == 0);
4584 if (uPde & EPT_E_READ)
4585 {
4586#ifdef PGM_WITH_LARGE_PAGES
4587 if (uPde & EPT_E_LEAF)
4588 {
4589 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4590 i, uPde & EPT_PDE2M_PG_MASK, pPage->GCPhys));
4591 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & EPT_PDE2M_PG_MASK,
4592 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4593 i);
4594 }
4595 else
4596#endif
4597 {
4598 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & EPT_PDE_PG_MASK);
4599 if (pSubPage)
4600 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4601 else
4602 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4603 }
4604 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4605 }
4606 }
4607}
4608
4609
4610/**
4611 * Clear references to shadowed pages in an EPT page directory pointer table.
4612 *
4613 * @param pPool The pool.
4614 * @param pPage The page.
4615 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4616 */
4617DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4618{
4619 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4620 {
4621 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4622 Assert((uPdpe & UINT64_C(0xfff0000000000f80)) == 0);
4623 if (uPdpe & EPT_E_READ)
4624 {
4625 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & EPT_PDPTE_PG_MASK);
4626 if (pSubPage)
4627 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4628 else
4629 AssertFatalMsgFailed(("%RX64\n", uPdpe & EPT_PDPTE_PG_MASK));
4630 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4631 }
4632 }
4633}
4634
4635
4636/**
4637 * Clears all references made by this page.
4638 *
4639 * This includes other shadow pages and GC physical addresses.
4640 *
4641 * @param pPool The pool.
4642 * @param pPage The page.
4643 */
4644static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4645{
4646 /*
4647 * Map the shadow page and take action according to the page kind.
4648 */
4649 PVMCC pVM = pPool->CTX_SUFF(pVM);
4650 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4651 switch (pPage->enmKind)
4652 {
4653 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4654 {
4655 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4656 void *pvGst;
4657 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4658 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4659 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4660 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4661 break;
4662 }
4663
4664 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4665 {
4666 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4667 void *pvGst;
4668 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4669 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
4670 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4671 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4672 break;
4673 }
4674
4675 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4676 {
4677 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4678 void *pvGst;
4679 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4680 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
4681 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4682 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4683 break;
4684 }
4685
4686 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4687 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4688 {
4689 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4690 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4691 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4692 break;
4693 }
4694
4695 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4696 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4697 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4698 {
4699 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4700 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
4701 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4702 break;
4703 }
4704
4705 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4706 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4707 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4708 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4709 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4710 case PGMPOOLKIND_PAE_PD_PHYS:
4711 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4712 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4713 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4714 break;
4715
4716 case PGMPOOLKIND_32BIT_PD_PHYS:
4717 case PGMPOOLKIND_32BIT_PD:
4718 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4719 break;
4720
4721 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4722 case PGMPOOLKIND_PAE_PDPT:
4723 case PGMPOOLKIND_PAE_PDPT_PHYS:
4724 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4725 break;
4726
4727 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4728 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4729 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4730 break;
4731
4732 case PGMPOOLKIND_64BIT_PML4:
4733 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4734 break;
4735
4736 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4737 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4738 break;
4739
4740 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4741 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4742 break;
4743
4744 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4745 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4746 break;
4747
4748 default:
4749 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4750 }
4751
4752 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4753 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4754 ASMMemZeroPage(pvShw);
4755 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4756 pPage->fZeroed = true;
4757 Assert(!pPage->cPresent);
4758 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
4759}
4760
4761
4762/**
4763 * Flushes a pool page.
4764 *
4765 * This moves the page to the free list after removing all user references to it.
4766 *
4767 * @returns VBox status code.
4768 * @retval VINF_SUCCESS on success.
4769 * @param pPool The pool.
4770 * @param pPage The shadow page.
4771 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4772 */
4773int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4774{
4775 PVMCC pVM = pPool->CTX_SUFF(pVM);
4776 bool fFlushRequired = false;
4777
4778 int rc = VINF_SUCCESS;
4779 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4780 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4781 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4782
4783 /*
4784 * Reject any attempts at flushing any of the special root pages (shall
4785 * not happen).
4786 */
4787 AssertMsgReturn(pPage->idx >= PGMPOOL_IDX_FIRST,
4788 ("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n",
4789 pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx),
4790 VINF_SUCCESS);
4791
4792 PGM_LOCK_VOID(pVM);
4793
4794 /*
4795 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4796 */
4797 if (pgmPoolIsPageLocked(pPage))
4798 {
4799 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4800 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4801 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4802 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4803 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4804 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4805 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4806 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4807 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
4808 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
4809 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4810 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4811 PGM_UNLOCK(pVM);
4812 return VINF_SUCCESS;
4813 }
4814
4815 /*
4816 * Mark the page as being in need of an ASMMemZeroPage().
4817 */
4818 pPage->fZeroed = false;
4819
4820#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4821 if (pPage->fDirty)
4822 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirtyEntry, false /* do not remove */);
4823#endif
4824
4825 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4826 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4827 fFlushRequired = true;
4828
4829 /*
4830 * Clear the page.
4831 */
4832 pgmPoolTrackClearPageUsers(pPool, pPage);
4833 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4834 pgmPoolTrackDeref(pPool, pPage);
4835 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4836
4837 /*
4838 * Flush it from the cache.
4839 */
4840 pgmPoolCacheFlushPage(pPool, pPage);
4841
4842 /*
4843 * Deregistering the monitoring.
4844 */
4845 if (pPage->fMonitored)
4846 rc = pgmPoolMonitorFlush(pPool, pPage);
4847
4848 /*
4849 * Free the page.
4850 */
4851 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4852 pPage->iNext = pPool->iFreeHead;
4853 pPool->iFreeHead = pPage->idx;
4854 pPage->enmKind = PGMPOOLKIND_FREE;
4855 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4856 pPage->GCPhys = NIL_RTGCPHYS;
4857 pPage->fReusedFlushPending = false;
4858
4859 pPool->cUsedPages--;
4860
4861 /* Flush the TLBs of all VCPUs if required. */
4862 if ( fFlushRequired
4863 && fFlush)
4864 {
4865 PGM_INVL_ALL_VCPU_TLBS(pVM);
4866 }
4867
4868 PGM_UNLOCK(pVM);
4869 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4870 return rc;
4871}
4872
4873
4874/**
4875 * Frees a usage of a pool page.
4876 *
4877 * The caller is responsible to updating the user table so that it no longer
4878 * references the shadow page.
4879 *
4880 * @param pPool The pool.
4881 * @param pPage The shadow page.
4882 * @param iUser The shadow page pool index of the user table.
4883 * NIL_PGMPOOL_IDX for root pages.
4884 * @param iUserTable The index into the user table (shadowed). Ignored if
4885 * root page.
4886 */
4887void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4888{
4889 PVMCC pVM = pPool->CTX_SUFF(pVM);
4890
4891 STAM_PROFILE_START(&pPool->StatFree, a);
4892 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
4893 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4894 AssertReturnVoid(pPage->idx >= PGMPOOL_IDX_FIRST); /* paranoia (#6349) */
4895
4896 PGM_LOCK_VOID(pVM);
4897 if (iUser != NIL_PGMPOOL_IDX)
4898 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4899 if (!pPage->fCached)
4900 pgmPoolFlushPage(pPool, pPage);
4901 PGM_UNLOCK(pVM);
4902 STAM_PROFILE_STOP(&pPool->StatFree, a);
4903}
4904
4905
4906/**
4907 * Makes one or more free page free.
4908 *
4909 * @returns VBox status code.
4910 * @retval VINF_SUCCESS on success.
4911 *
4912 * @param pPool The pool.
4913 * @param enmKind Page table kind
4914 * @param iUser The user of the page.
4915 */
4916static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4917{
4918 PVMCC pVM = pPool->CTX_SUFF(pVM);
4919 LogFlow(("pgmPoolMakeMoreFreePages: enmKind=%d iUser=%d\n", enmKind, iUser));
4920 NOREF(enmKind);
4921
4922 /*
4923 * If the pool isn't full grown yet, expand it.
4924 */
4925 if (pPool->cCurPages < pPool->cMaxPages)
4926 {
4927 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4928#ifdef IN_RING3
4929 int rc = PGMR3PoolGrow(pVM, VMMGetCpu(pVM));
4930#else
4931 int rc = PGMR0PoolGrow(pVM, VMMGetCpuId(pVM));
4932#endif
4933 if (RT_FAILURE(rc))
4934 return rc;
4935 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4936 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4937 return VINF_SUCCESS;
4938 }
4939
4940 /*
4941 * Free one cached page.
4942 */
4943 return pgmPoolCacheFreeOne(pPool, iUser);
4944}
4945
4946
4947/**
4948 * Allocates a page from the pool.
4949 *
4950 * This page may actually be a cached page and not in need of any processing
4951 * on the callers part.
4952 *
4953 * @returns VBox status code.
4954 * @retval VINF_SUCCESS if a NEW page was allocated.
4955 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4956 *
4957 * @param pVM The cross context VM structure.
4958 * @param GCPhys The GC physical address of the page we're gonna shadow.
4959 * For 4MB and 2MB PD entries, it's the first address the
4960 * shadow PT is covering.
4961 * @param enmKind The kind of mapping.
4962 * @param enmAccess Access type for the mapping (only relevant for big pages)
4963 * @param fA20Enabled Whether the A20 gate is enabled or not.
4964 * @param iUser The shadow page pool index of the user table. Root
4965 * pages should pass NIL_PGMPOOL_IDX.
4966 * @param iUserTable The index into the user table (shadowed). Ignored for
4967 * root pages (iUser == NIL_PGMPOOL_IDX).
4968 * @param fLockPage Lock the page
4969 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4970 */
4971int pgmPoolAlloc(PVMCC pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
4972 uint16_t iUser, uint32_t iUserTable, bool fLockPage, PPPGMPOOLPAGE ppPage)
4973{
4974 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4975 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4976 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4977 *ppPage = NULL;
4978 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4979 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4980 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4981
4982 PGM_LOCK_VOID(pVM);
4983
4984 if (pPool->fCacheEnabled)
4985 {
4986 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, fA20Enabled, iUser, iUserTable, ppPage);
4987 if (RT_SUCCESS(rc2))
4988 {
4989 if (fLockPage)
4990 pgmPoolLockPage(pPool, *ppPage);
4991 PGM_UNLOCK(pVM);
4992 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4993 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4994 return rc2;
4995 }
4996 }
4997
4998 /*
4999 * Allocate a new one.
5000 */
5001 int rc = VINF_SUCCESS;
5002 uint16_t iNew = pPool->iFreeHead;
5003 if (iNew == NIL_PGMPOOL_IDX)
5004 {
5005 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
5006 if (RT_FAILURE(rc))
5007 {
5008 PGM_UNLOCK(pVM);
5009 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
5010 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5011 return rc;
5012 }
5013 iNew = pPool->iFreeHead;
5014 AssertReleaseMsgReturn(iNew != NIL_PGMPOOL_IDX, ("iNew=%#x\n", iNew), VERR_PGM_POOL_IPE);
5015 }
5016
5017 /* unlink the free head */
5018 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5019 pPool->iFreeHead = pPage->iNext;
5020 pPage->iNext = NIL_PGMPOOL_IDX;
5021
5022 /*
5023 * Initialize it.
5024 */
5025 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5026 pPage->enmKind = enmKind;
5027 pPage->enmAccess = enmAccess;
5028 pPage->GCPhys = GCPhys;
5029 pPage->fA20Enabled = fA20Enabled;
5030 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5031 pPage->fMonitored = false;
5032 pPage->fCached = false;
5033 pPage->fDirty = false;
5034 pPage->fReusedFlushPending = false;
5035 pPage->cModifications = 0;
5036 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5037 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5038 pPage->cPresent = 0;
5039 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5040 pPage->idxDirtyEntry = 0;
5041 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5042 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5043 pPage->cLastAccessHandler = 0;
5044 pPage->cLocked = 0;
5045# ifdef VBOX_STRICT
5046 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5047# endif
5048
5049 /*
5050 * Insert into the tracking and cache. If this fails, free the page.
5051 */
5052 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5053 if (RT_FAILURE(rc3))
5054 {
5055 pPool->cUsedPages--;
5056 pPage->enmKind = PGMPOOLKIND_FREE;
5057 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5058 pPage->GCPhys = NIL_RTGCPHYS;
5059 pPage->iNext = pPool->iFreeHead;
5060 pPool->iFreeHead = pPage->idx;
5061 PGM_UNLOCK(pVM);
5062 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5063 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
5064 return rc3;
5065 }
5066
5067 /*
5068 * Commit the allocation, clear the page and return.
5069 */
5070#ifdef VBOX_WITH_STATISTICS
5071 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5072 pPool->cUsedPagesHigh = pPool->cUsedPages;
5073#endif
5074
5075 if (!pPage->fZeroed)
5076 {
5077 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5078 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5079 ASMMemZeroPage(pv);
5080 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5081 }
5082
5083 *ppPage = pPage;
5084 if (fLockPage)
5085 pgmPoolLockPage(pPool, pPage);
5086 PGM_UNLOCK(pVM);
5087 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5088 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5089 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5090 return rc;
5091}
5092
5093
5094/**
5095 * Frees a usage of a pool page.
5096 *
5097 * @param pVM The cross context VM structure.
5098 * @param HCPhys The HC physical address of the shadow page.
5099 * @param iUser The shadow page pool index of the user table.
5100 * NIL_PGMPOOL_IDX if root page.
5101 * @param iUserTable The index into the user table (shadowed). Ignored if
5102 * root page.
5103 */
5104void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5105{
5106 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5107 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5108 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5109}
5110
5111
5112/**
5113 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5114 *
5115 * @returns Pointer to the shadow page structure.
5116 * @param pPool The pool.
5117 * @param HCPhys The HC physical address of the shadow page.
5118 */
5119PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5120{
5121 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5122
5123 /*
5124 * Look up the page.
5125 */
5126 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5127
5128 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5129 return pPage;
5130}
5131
5132
5133/**
5134 * Internal worker for finding a page for debugging purposes, no assertions.
5135 *
5136 * @returns Pointer to the shadow page structure. NULL on if not found.
5137 * @param pPool The pool.
5138 * @param HCPhys The HC physical address of the shadow page.
5139 */
5140PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5141{
5142 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5143 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5144}
5145
5146
5147/**
5148 * Internal worker for PGM_HCPHYS_2_PTR.
5149 *
5150 * @returns VBox status code.
5151 * @param pVM The cross context VM structure.
5152 * @param HCPhys The HC physical address of the shadow page.
5153 * @param ppv Where to return the address.
5154 */
5155int pgmPoolHCPhys2Ptr(PVM pVM, RTHCPHYS HCPhys, void **ppv)
5156{
5157 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pVM->pgm.s.CTX_SUFF(pPool)->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5158 AssertMsgReturn(pPage && pPage->enmKind != PGMPOOLKIND_FREE,
5159 ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0),
5160 VERR_PGM_POOL_GET_PAGE_FAILED);
5161 *ppv = (uint8_t *)pPage->CTX_SUFF(pvPage) + (HCPhys & PAGE_OFFSET_MASK);
5162 return VINF_SUCCESS;
5163}
5164
5165#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5166
5167/**
5168 * Flush the specified page if present
5169 *
5170 * @param pVM The cross context VM structure.
5171 * @param GCPhys Guest physical address of the page to flush
5172 */
5173void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5174{
5175 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5176
5177 VM_ASSERT_EMT(pVM);
5178
5179 /*
5180 * Look up the GCPhys in the hash.
5181 */
5182 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5183 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5184 if (i == NIL_PGMPOOL_IDX)
5185 return;
5186
5187 do
5188 {
5189 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5190 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5191 {
5192 switch (pPage->enmKind)
5193 {
5194 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5195 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5196 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5197 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5198 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5199 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5200 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5201 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5202 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5203 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5204 case PGMPOOLKIND_64BIT_PML4:
5205 case PGMPOOLKIND_32BIT_PD:
5206 case PGMPOOLKIND_PAE_PDPT:
5207 {
5208 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5209# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5210 if (pPage->fDirty)
5211 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5212 else
5213# endif
5214 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5215 Assert(!pgmPoolIsPageLocked(pPage));
5216 pgmPoolMonitorChainFlush(pPool, pPage);
5217 return;
5218 }
5219
5220 /* ignore, no monitoring. */
5221 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5222 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5223 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5224 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5225 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5226 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5227 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5228 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5229 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5230 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5231 case PGMPOOLKIND_ROOT_NESTED:
5232 case PGMPOOLKIND_PAE_PD_PHYS:
5233 case PGMPOOLKIND_PAE_PDPT_PHYS:
5234 case PGMPOOLKIND_32BIT_PD_PHYS:
5235 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5236 break;
5237
5238 default:
5239 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5240 }
5241 }
5242
5243 /* next */
5244 i = pPage->iNext;
5245 } while (i != NIL_PGMPOOL_IDX);
5246 return;
5247}
5248
5249
5250/**
5251 * Reset CPU on hot plugging.
5252 *
5253 * @param pVM The cross context VM structure.
5254 * @param pVCpu The cross context virtual CPU structure.
5255 */
5256void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5257{
5258 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5259
5260 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5261 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5262 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5263}
5264
5265
5266/**
5267 * Flushes the entire cache.
5268 *
5269 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5270 * this and execute this CR3 flush.
5271 *
5272 * @param pVM The cross context VM structure.
5273 */
5274void pgmR3PoolReset(PVM pVM)
5275{
5276 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5277
5278 PGM_LOCK_ASSERT_OWNER(pVM);
5279 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5280 LogFlow(("pgmR3PoolReset:\n"));
5281
5282 /*
5283 * If there are no pages in the pool, there is nothing to do.
5284 */
5285 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5286 {
5287 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5288 return;
5289 }
5290
5291 /*
5292 * Exit the shadow mode since we're going to clear everything,
5293 * including the root page.
5294 */
5295 VMCC_FOR_EACH_VMCPU(pVM)
5296 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5297 VMCC_FOR_EACH_VMCPU_END(pVM);
5298
5299
5300 /*
5301 * Nuke the free list and reinsert all pages into it.
5302 */
5303 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5304 {
5305 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5306
5307 if (pPage->fMonitored)
5308 pgmPoolMonitorFlush(pPool, pPage);
5309 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5310 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5311 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5312 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5313 pPage->GCPhys = NIL_RTGCPHYS;
5314 pPage->enmKind = PGMPOOLKIND_FREE;
5315 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5316 Assert(pPage->idx == i);
5317 pPage->iNext = i + 1;
5318 pPage->fA20Enabled = true;
5319 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5320 pPage->fSeenNonGlobal = false;
5321 pPage->fMonitored = false;
5322 pPage->fDirty = false;
5323 pPage->fCached = false;
5324 pPage->fReusedFlushPending = false;
5325 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5326 pPage->cPresent = 0;
5327 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5328 pPage->cModifications = 0;
5329 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5330 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5331 pPage->idxDirtyEntry = 0;
5332 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5333 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5334 pPage->cLastAccessHandler = 0;
5335 pPage->cLocked = 0;
5336# ifdef VBOX_STRICT
5337 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5338# endif
5339 }
5340 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5341 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5342 pPool->cUsedPages = 0;
5343
5344 /*
5345 * Zap and reinitialize the user records.
5346 */
5347 pPool->cPresent = 0;
5348 pPool->iUserFreeHead = 0;
5349 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5350 const unsigned cMaxUsers = pPool->cMaxUsers;
5351 for (unsigned i = 0; i < cMaxUsers; i++)
5352 {
5353 paUsers[i].iNext = i + 1;
5354 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5355 paUsers[i].iUserTable = 0xfffffffe;
5356 }
5357 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5358
5359 /*
5360 * Clear all the GCPhys links and rebuild the phys ext free list.
5361 */
5362 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
5363 pRam;
5364 pRam = pRam->CTX_SUFF(pNext))
5365 {
5366 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5367 while (iPage-- > 0)
5368 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5369 }
5370
5371 pPool->iPhysExtFreeHead = 0;
5372 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5373 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5374 for (unsigned i = 0; i < cMaxPhysExts; i++)
5375 {
5376 paPhysExts[i].iNext = i + 1;
5377 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5378 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5379 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5380 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5381 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5382 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5383 }
5384 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5385
5386 /*
5387 * Just zap the modified list.
5388 */
5389 pPool->cModifiedPages = 0;
5390 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5391
5392 /*
5393 * Clear the GCPhys hash and the age list.
5394 */
5395 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5396 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5397 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5398 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5399
5400# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5401 /* Clear all dirty pages. */
5402 pPool->idxFreeDirtyPage = 0;
5403 pPool->cDirtyPages = 0;
5404 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aidxDirtyPages); i++)
5405 pPool->aidxDirtyPages[i] = NIL_PGMPOOL_IDX;
5406# endif
5407
5408 /*
5409 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5410 */
5411 VMCC_FOR_EACH_VMCPU(pVM)
5412 {
5413 /*
5414 * Re-enter the shadowing mode and assert Sync CR3 FF.
5415 */
5416 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5417 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5418 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5419 }
5420 VMCC_FOR_EACH_VMCPU_END(pVM);
5421
5422 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5423}
5424
5425#endif /* IN_RING3 */
5426
5427#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5428/**
5429 * Stringifies a PGMPOOLKIND value.
5430 */
5431static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5432{
5433 switch ((PGMPOOLKIND)enmKind)
5434 {
5435 case PGMPOOLKIND_INVALID:
5436 return "PGMPOOLKIND_INVALID";
5437 case PGMPOOLKIND_FREE:
5438 return "PGMPOOLKIND_FREE";
5439 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5440 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5441 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5442 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5443 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5444 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5445 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5446 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5447 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5448 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5449 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5450 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5451 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5452 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5453 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5454 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5455 case PGMPOOLKIND_32BIT_PD:
5456 return "PGMPOOLKIND_32BIT_PD";
5457 case PGMPOOLKIND_32BIT_PD_PHYS:
5458 return "PGMPOOLKIND_32BIT_PD_PHYS";
5459 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5460 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5461 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5462 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5463 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5464 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5465 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5466 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5467 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5468 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5469 case PGMPOOLKIND_PAE_PD_PHYS:
5470 return "PGMPOOLKIND_PAE_PD_PHYS";
5471 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5472 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5473 case PGMPOOLKIND_PAE_PDPT:
5474 return "PGMPOOLKIND_PAE_PDPT";
5475 case PGMPOOLKIND_PAE_PDPT_PHYS:
5476 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5477 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5478 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5479 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5480 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5481 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5482 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5483 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5484 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5485 case PGMPOOLKIND_64BIT_PML4:
5486 return "PGMPOOLKIND_64BIT_PML4";
5487 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5488 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5489 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5490 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5491 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5492 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5493 case PGMPOOLKIND_ROOT_NESTED:
5494 return "PGMPOOLKIND_ROOT_NESTED";
5495 }
5496 return "Unknown kind!";
5497}
5498#endif /* LOG_ENABLED || VBOX_STRICT */
5499
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette