VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 82890

Last change on this file since 82890 was 82888, checked in by vboxsync, 5 years ago

PGMAllPool: More details on corruption.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 210.2 KB
Line 
1/* $Id: PGMAllPool.cpp 82888 2020-01-28 15:23:34Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2019 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#include <VBox/vmm/pgm.h>
24#include <VBox/vmm/mm.h>
25#include <VBox/vmm/em.h>
26#include <VBox/vmm/cpum.h>
27#include "PGMInternal.h"
28#include <VBox/vmm/vmcc.h>
29#include "PGMInline.h"
30#include <VBox/disopcode.h>
31#include <VBox/vmm/hm_vmx.h>
32
33#include <VBox/log.h>
34#include <VBox/err.h>
35#include <iprt/asm.h>
36#include <iprt/asm-amd64-x86.h>
37#include <iprt/string.h>
38
39
40/*********************************************************************************************************************************
41* Internal Functions *
42*********************************************************************************************************************************/
43RT_C_DECLS_BEGIN
44#if 0 /* unused */
45DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
46DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
47#endif /* unused */
48static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
49static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
50static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
51static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
52#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
53static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
54#endif
55#if 0 /*defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)*/
56static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
57#endif
58
59int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage);
60PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
61void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
62void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
63
64RT_C_DECLS_END
65
66
67#if 0 /* unused */
68/**
69 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
70 *
71 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
72 * @param enmKind The page kind.
73 */
74DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
75{
76 switch (enmKind)
77 {
78 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
79 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
81 return true;
82 default:
83 return false;
84 }
85}
86#endif /* unused */
87
88
89/**
90 * Flushes a chain of pages sharing the same access monitor.
91 *
92 * @param pPool The pool.
93 * @param pPage A page in the chain.
94 */
95void pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
96{
97 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
98
99 /*
100 * Find the list head.
101 */
102 uint16_t idx = pPage->idx;
103 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
104 {
105 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
106 {
107 idx = pPage->iMonitoredPrev;
108 Assert(idx != pPage->idx);
109 pPage = &pPool->aPages[idx];
110 }
111 }
112
113 /*
114 * Iterate the list flushing each shadow page.
115 */
116 for (;;)
117 {
118 idx = pPage->iMonitoredNext;
119 Assert(idx != pPage->idx);
120 if (pPage->idx >= PGMPOOL_IDX_FIRST)
121 {
122 int rc2 = pgmPoolFlushPage(pPool, pPage);
123 AssertRC(rc2);
124 }
125 /* next */
126 if (idx == NIL_PGMPOOL_IDX)
127 break;
128 pPage = &pPool->aPages[idx];
129 }
130}
131
132
133/**
134 * Wrapper for getting the current context pointer to the entry being modified.
135 *
136 * @returns VBox status code suitable for scheduling.
137 * @param pVM The cross context VM structure.
138 * @param pvDst Destination address
139 * @param pvSrc Pointer to the mapping of @a GCPhysSrc or NULL depending
140 * on the context (e.g. \#PF in R0 & RC).
141 * @param GCPhysSrc The source guest physical address.
142 * @param cb Size of data to read
143 */
144DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVMCC pVM, void *pvDst, void const *pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
145{
146#if defined(IN_RING3)
147 NOREF(pVM); NOREF(GCPhysSrc);
148 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
149 return VINF_SUCCESS;
150#else
151 /** @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
152 NOREF(pvSrc);
153 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
154#endif
155}
156
157
158/**
159 * Process shadow entries before they are changed by the guest.
160 *
161 * For PT entries we will clear them. For PD entries, we'll simply check
162 * for mapping conflicts and set the SyncCR3 FF if found.
163 *
164 * @param pVCpu The cross context virtual CPU structure.
165 * @param pPool The pool.
166 * @param pPage The head page.
167 * @param GCPhysFault The guest physical fault address.
168 * @param pvAddress Pointer to the mapping of @a GCPhysFault or NULL
169 * depending on the context (e.g. \#PF in R0 & RC).
170 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
171 */
172static void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault,
173 void const *pvAddress, unsigned cbWrite)
174{
175 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
176 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
177 PVMCC pVM = pPool->CTX_SUFF(pVM);
178 NOREF(pVCpu);
179
180 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n",
181 (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))(uintptr_t)pvAddress, GCPhysFault, cbWrite));
182
183 for (;;)
184 {
185 union
186 {
187 void *pv;
188 PX86PT pPT;
189 PPGMSHWPTPAE pPTPae;
190 PX86PD pPD;
191 PX86PDPAE pPDPae;
192 PX86PDPT pPDPT;
193 PX86PML4 pPML4;
194 } uShw;
195
196 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s write=%#x\n",
197 pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
198
199 uShw.pv = NULL;
200 switch (pPage->enmKind)
201 {
202 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
203 {
204 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
205 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
206 const unsigned iShw = off / sizeof(X86PTE);
207 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
208 if (uShw.pPT->a[iShw].n.u1Present)
209 {
210 X86PTE GstPte;
211
212 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
213 AssertRC(rc);
214 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
215 pgmPoolTracDerefGCPhysHint(pPool, pPage,
216 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
217 GstPte.u & X86_PTE_PG_MASK,
218 iShw);
219 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
220 }
221 break;
222 }
223
224 /* page/2 sized */
225 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
226 {
227 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
228 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
229 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
230 {
231 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
232 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
233 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
234 {
235 X86PTE GstPte;
236 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
237 AssertRC(rc);
238
239 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
240 pgmPoolTracDerefGCPhysHint(pPool, pPage,
241 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
242 GstPte.u & X86_PTE_PG_MASK,
243 iShw);
244 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
245 }
246 }
247 break;
248 }
249
250 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
251 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
252 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
253 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
254 {
255 unsigned iGst = off / sizeof(X86PDE);
256 unsigned iShwPdpt = iGst / 256;
257 unsigned iShw = (iGst % 256) * 2;
258 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
259
260 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
261 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
262 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
263 {
264 for (unsigned i = 0; i < 2; i++)
265 {
266 if (uShw.pPDPae->a[iShw+i].n.u1Present)
267 {
268 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
269 pgmPoolFree(pVM,
270 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
271 pPage->idx,
272 iShw + i);
273 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw+i].u, 0);
274 }
275
276 /* paranoia / a bit assumptive. */
277 if ( (off & 3)
278 && (off & 3) + cbWrite > 4)
279 {
280 const unsigned iShw2 = iShw + 2 + i;
281 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
282 {
283 if (uShw.pPDPae->a[iShw2].n.u1Present)
284 {
285 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
286 pgmPoolFree(pVM,
287 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
288 pPage->idx,
289 iShw2);
290 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
291 }
292 }
293 }
294 }
295 }
296 break;
297 }
298
299 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
300 {
301 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
302 const unsigned iShw = off / sizeof(X86PTEPAE);
303 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
304 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
305 {
306 X86PTEPAE GstPte;
307 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
308 AssertRC(rc);
309
310 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
311 pgmPoolTracDerefGCPhysHint(pPool, pPage,
312 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
313 GstPte.u & X86_PTE_PAE_PG_MASK,
314 iShw);
315 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
316 }
317
318 /* paranoia / a bit assumptive. */
319 if ( (off & 7)
320 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
321 {
322 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
323 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
324
325 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
326 {
327 X86PTEPAE GstPte;
328 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
329 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
330 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
331 AssertRC(rc);
332 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
333 pgmPoolTracDerefGCPhysHint(pPool, pPage,
334 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
335 GstPte.u & X86_PTE_PAE_PG_MASK,
336 iShw2);
337 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
338 }
339 }
340 break;
341 }
342
343 case PGMPOOLKIND_32BIT_PD:
344 {
345 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
346 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
347
348 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
349 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
350 if (uShw.pPD->a[iShw].n.u1Present)
351 {
352 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
353 pgmPoolFree(pVM,
354 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
355 pPage->idx,
356 iShw);
357 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
358 }
359 /* paranoia / a bit assumptive. */
360 if ( (off & 3)
361 && (off & 3) + cbWrite > sizeof(X86PTE))
362 {
363 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
364 if ( iShw2 != iShw
365 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
366 {
367 if (uShw.pPD->a[iShw2].n.u1Present)
368 {
369 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
370 pgmPoolFree(pVM,
371 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
372 pPage->idx,
373 iShw2);
374 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
375 }
376 }
377 }
378#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). - not working any longer... */
379 if ( uShw.pPD->a[iShw].n.u1Present
380 && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
381 {
382 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
383 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
384 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
385 }
386#endif
387 break;
388 }
389
390 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
391 {
392 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
393 const unsigned iShw = off / sizeof(X86PDEPAE);
394 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
395
396 /*
397 * Causes trouble when the guest uses a PDE to refer to the whole page table level
398 * structure. (Invalidate here; faults later on when it tries to change the page
399 * table entries -> recheck; probably only applies to the RC case.)
400 */
401 if (uShw.pPDPae->a[iShw].n.u1Present)
402 {
403 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
404 pgmPoolFree(pVM,
405 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
406 pPage->idx,
407 iShw);
408 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
409 }
410
411 /* paranoia / a bit assumptive. */
412 if ( (off & 7)
413 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
414 {
415 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
416 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
417
418 if (uShw.pPDPae->a[iShw2].n.u1Present)
419 {
420 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
421 pgmPoolFree(pVM,
422 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
423 pPage->idx,
424 iShw2);
425 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
426 }
427 }
428 break;
429 }
430
431 case PGMPOOLKIND_PAE_PDPT:
432 {
433 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
434 /*
435 * Hopefully this doesn't happen very often:
436 * - touching unused parts of the page
437 * - messing with the bits of pd pointers without changing the physical address
438 */
439 /* PDPT roots are not page aligned; 32 byte only! */
440 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
441
442 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
443 const unsigned iShw = offPdpt / sizeof(X86PDPE);
444 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
445 {
446 if (uShw.pPDPT->a[iShw].n.u1Present)
447 {
448 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
449 pgmPoolFree(pVM,
450 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
451 pPage->idx,
452 iShw);
453 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
454 }
455
456 /* paranoia / a bit assumptive. */
457 if ( (offPdpt & 7)
458 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
459 {
460 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
461 if ( iShw2 != iShw
462 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
463 {
464 if (uShw.pPDPT->a[iShw2].n.u1Present)
465 {
466 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
467 pgmPoolFree(pVM,
468 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
469 pPage->idx,
470 iShw2);
471 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
472 }
473 }
474 }
475 }
476 break;
477 }
478
479 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
480 {
481 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
482 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
483 const unsigned iShw = off / sizeof(X86PDEPAE);
484 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
485 if (uShw.pPDPae->a[iShw].n.u1Present)
486 {
487 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
488 pgmPoolFree(pVM,
489 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
490 pPage->idx,
491 iShw);
492 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
493 }
494 /* paranoia / a bit assumptive. */
495 if ( (off & 7)
496 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
497 {
498 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
499 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
500
501 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
502 if (uShw.pPDPae->a[iShw2].n.u1Present)
503 {
504 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
505 pgmPoolFree(pVM,
506 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
507 pPage->idx,
508 iShw2);
509 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
510 }
511 }
512 break;
513 }
514
515 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
516 {
517 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
518 /*
519 * Hopefully this doesn't happen very often:
520 * - messing with the bits of pd pointers without changing the physical address
521 */
522 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
523 const unsigned iShw = off / sizeof(X86PDPE);
524 if (uShw.pPDPT->a[iShw].n.u1Present)
525 {
526 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
527 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
528 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
529 }
530 /* paranoia / a bit assumptive. */
531 if ( (off & 7)
532 && (off & 7) + cbWrite > sizeof(X86PDPE))
533 {
534 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
535 if (uShw.pPDPT->a[iShw2].n.u1Present)
536 {
537 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
538 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
539 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
540 }
541 }
542 break;
543 }
544
545 case PGMPOOLKIND_64BIT_PML4:
546 {
547 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
548 /*
549 * Hopefully this doesn't happen very often:
550 * - messing with the bits of pd pointers without changing the physical address
551 */
552 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
553 const unsigned iShw = off / sizeof(X86PDPE);
554 if (uShw.pPML4->a[iShw].n.u1Present)
555 {
556 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
557 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
558 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
559 }
560 /* paranoia / a bit assumptive. */
561 if ( (off & 7)
562 && (off & 7) + cbWrite > sizeof(X86PDPE))
563 {
564 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
565 if (uShw.pPML4->a[iShw2].n.u1Present)
566 {
567 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
568 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
569 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
570 }
571 }
572 break;
573 }
574
575 default:
576 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
577 }
578 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
579
580 /* next */
581 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
582 return;
583 pPage = &pPool->aPages[pPage->iMonitoredNext];
584 }
585}
586
587#ifndef IN_RING3
588
589/**
590 * Checks if a access could be a fork operation in progress.
591 *
592 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
593 *
594 * @returns true if it's likely that we're forking, otherwise false.
595 * @param pPool The pool.
596 * @param pDis The disassembled instruction.
597 * @param offFault The access offset.
598 */
599DECLINLINE(bool) pgmRZPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
600{
601 /*
602 * i386 linux is using btr to clear X86_PTE_RW.
603 * The functions involved are (2.6.16 source inspection):
604 * clear_bit
605 * ptep_set_wrprotect
606 * copy_one_pte
607 * copy_pte_range
608 * copy_pmd_range
609 * copy_pud_range
610 * copy_page_range
611 * dup_mmap
612 * dup_mm
613 * copy_mm
614 * copy_process
615 * do_fork
616 */
617 if ( pDis->pCurInstr->uOpcode == OP_BTR
618 && !(offFault & 4)
619 /** @todo Validate that the bit index is X86_PTE_RW. */
620 )
621 {
622 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,Fork)); RT_NOREF_PV(pPool);
623 return true;
624 }
625 return false;
626}
627
628
629/**
630 * Determine whether the page is likely to have been reused.
631 *
632 * @returns true if we consider the page as being reused for a different purpose.
633 * @returns false if we consider it to still be a paging page.
634 * @param pVM The cross context VM structure.
635 * @param pVCpu The cross context virtual CPU structure.
636 * @param pRegFrame Trap register frame.
637 * @param pDis The disassembly info for the faulting instruction.
638 * @param pvFault The fault address.
639 * @param pPage The pool page being accessed.
640 *
641 * @remark The REP prefix check is left to the caller because of STOSD/W.
642 */
643DECLINLINE(bool) pgmRZPoolMonitorIsReused(PVMCC pVM, PVMCPUCC pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault,
644 PPGMPOOLPAGE pPage)
645{
646 /* Locked (CR3, PDPTR*4) should not be reusable. Considering them as
647 such may cause loops booting tst-ubuntu-15_10-64-efi, ++. */
648 if (pPage->cLocked)
649 {
650 Log2(("pgmRZPoolMonitorIsReused: %RGv (%p) can't have been resued, because it's locked!\n", pvFault, pPage));
651 return false;
652 }
653
654 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
655 if ( HMHasPendingIrq(pVM)
656 && pRegFrame->rsp - pvFault < 32)
657 {
658 /* Fault caused by stack writes while trying to inject an interrupt event. */
659 Log(("pgmRZPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
660 return true;
661 }
662
663 LogFlow(("Reused instr %RGv %d at %RGv param1.fUse=%llx param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->uOpcode, pvFault, pDis->Param1.fUse, pDis->Param1.Base.idxGenReg));
664
665 /* Non-supervisor mode write means it's used for something else. */
666 if (CPUMGetGuestCPL(pVCpu) == 3)
667 return true;
668
669 switch (pDis->pCurInstr->uOpcode)
670 {
671 /* call implies the actual push of the return address faulted */
672 case OP_CALL:
673 Log4(("pgmRZPoolMonitorIsReused: CALL\n"));
674 return true;
675 case OP_PUSH:
676 Log4(("pgmRZPoolMonitorIsReused: PUSH\n"));
677 return true;
678 case OP_PUSHF:
679 Log4(("pgmRZPoolMonitorIsReused: PUSHF\n"));
680 return true;
681 case OP_PUSHA:
682 Log4(("pgmRZPoolMonitorIsReused: PUSHA\n"));
683 return true;
684 case OP_FXSAVE:
685 Log4(("pgmRZPoolMonitorIsReused: FXSAVE\n"));
686 return true;
687 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
688 Log4(("pgmRZPoolMonitorIsReused: MOVNTI\n"));
689 return true;
690 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
691 Log4(("pgmRZPoolMonitorIsReused: MOVNTDQ\n"));
692 return true;
693 case OP_MOVSWD:
694 case OP_STOSWD:
695 if ( pDis->fPrefix == (DISPREFIX_REP|DISPREFIX_REX)
696 && pRegFrame->rcx >= 0x40
697 )
698 {
699 Assert(pDis->uCpuMode == DISCPUMODE_64BIT);
700
701 Log(("pgmRZPoolMonitorIsReused: OP_STOSQ\n"));
702 return true;
703 }
704 break;
705
706 default:
707 /*
708 * Anything having ESP on the left side means stack writes.
709 */
710 if ( ( (pDis->Param1.fUse & DISUSE_REG_GEN32)
711 || (pDis->Param1.fUse & DISUSE_REG_GEN64))
712 && (pDis->Param1.Base.idxGenReg == DISGREG_ESP))
713 {
714 Log4(("pgmRZPoolMonitorIsReused: ESP\n"));
715 return true;
716 }
717 break;
718 }
719
720 /*
721 * Page table updates are very very unlikely to be crossing page boundraries,
722 * and we don't want to deal with that in pgmPoolMonitorChainChanging and such.
723 */
724 uint32_t const cbWrite = DISGetParamSize(pDis, &pDis->Param1);
725 if ( (((uintptr_t)pvFault + cbWrite) >> X86_PAGE_SHIFT) != ((uintptr_t)pvFault >> X86_PAGE_SHIFT) )
726 {
727 Log4(("pgmRZPoolMonitorIsReused: cross page write\n"));
728 return true;
729 }
730
731 /*
732 * Nobody does an unaligned 8 byte write to a page table, right.
733 */
734 if (cbWrite >= 8 && ((uintptr_t)pvFault & 7) != 0)
735 {
736 Log4(("pgmRZPoolMonitorIsReused: Unaligned 8+ byte write\n"));
737 return true;
738 }
739
740 return false;
741}
742
743
744/**
745 * Flushes the page being accessed.
746 *
747 * @returns VBox status code suitable for scheduling.
748 * @param pVM The cross context VM structure.
749 * @param pVCpu The cross context virtual CPU structure.
750 * @param pPool The pool.
751 * @param pPage The pool page (head).
752 * @param pDis The disassembly of the write instruction.
753 * @param pRegFrame The trap register frame.
754 * @param GCPhysFault The fault address as guest physical address.
755 * @param pvFault The fault address.
756 * @todo VBOXSTRICTRC
757 */
758static int pgmRZPoolAccessPfHandlerFlush(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
759 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
760{
761 NOREF(pVM); NOREF(GCPhysFault);
762
763 /*
764 * First, do the flushing.
765 */
766 pgmPoolMonitorChainFlush(pPool, pPage);
767
768 /*
769 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
770 * Must do this in raw mode (!); XP boot will fail otherwise.
771 */
772 int rc = VINF_SUCCESS;
773 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
774 if (rc2 == VINF_SUCCESS)
775 { /* do nothing */ }
776 else if (rc2 == VINF_EM_RESCHEDULE)
777 {
778 rc = VBOXSTRICTRC_VAL(rc2);
779# ifndef IN_RING3
780 VMCPU_FF_SET(pVCpu, VMCPU_FF_TO_R3);
781# endif
782 }
783 else if (rc2 == VERR_EM_INTERPRETER)
784 {
785 rc = VINF_EM_RAW_EMULATE_INSTR;
786 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
787 }
788 else if (RT_FAILURE_NP(rc2))
789 rc = VBOXSTRICTRC_VAL(rc2);
790 else
791 AssertMsgFailed(("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
792
793 LogFlow(("pgmRZPoolAccessPfHandlerFlush: returns %Rrc (flushed)\n", rc));
794 return rc;
795}
796
797
798/**
799 * Handles the STOSD write accesses.
800 *
801 * @returns VBox status code suitable for scheduling.
802 * @param pVM The cross context VM structure.
803 * @param pPool The pool.
804 * @param pPage The pool page (head).
805 * @param pDis The disassembly of the write instruction.
806 * @param pRegFrame The trap register frame.
807 * @param GCPhysFault The fault address as guest physical address.
808 * @param pvFault The fault address.
809 */
810DECLINLINE(int) pgmRZPoolAccessPfHandlerSTOSD(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
811 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
812{
813 unsigned uIncrement = pDis->Param1.cb;
814 NOREF(pVM);
815
816 Assert(pDis->uCpuMode == DISCPUMODE_32BIT || pDis->uCpuMode == DISCPUMODE_64BIT);
817 Assert(pRegFrame->rcx <= 0x20);
818
819# ifdef VBOX_STRICT
820 if (pDis->uOpMode == DISCPUMODE_32BIT)
821 Assert(uIncrement == 4);
822 else
823 Assert(uIncrement == 8);
824# endif
825
826 Log3(("pgmRZPoolAccessPfHandlerSTOSD\n"));
827
828 /*
829 * Increment the modification counter and insert it into the list
830 * of modified pages the first time.
831 */
832 if (!pPage->cModifications++)
833 pgmPoolMonitorModifiedInsert(pPool, pPage);
834
835 /*
836 * Execute REP STOSD.
837 *
838 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
839 * write situation, meaning that it's safe to write here.
840 */
841 PVMCPUCC pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
842 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
843 while (pRegFrame->rcx)
844 {
845# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
846 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
847 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
848 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
849# else
850 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
851# endif
852 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
853 pu32 += uIncrement;
854 GCPhysFault += uIncrement;
855 pRegFrame->rdi += uIncrement;
856 pRegFrame->rcx--;
857 }
858 pRegFrame->rip += pDis->cbInstr;
859
860 LogFlow(("pgmRZPoolAccessPfHandlerSTOSD: returns\n"));
861 return VINF_SUCCESS;
862}
863
864
865/**
866 * Handles the simple write accesses.
867 *
868 * @returns VBox status code suitable for scheduling.
869 * @param pVM The cross context VM structure.
870 * @param pVCpu The cross context virtual CPU structure.
871 * @param pPool The pool.
872 * @param pPage The pool page (head).
873 * @param pDis The disassembly of the write instruction.
874 * @param pRegFrame The trap register frame.
875 * @param GCPhysFault The fault address as guest physical address.
876 * @param pvFault The fault address.
877 * @param pfReused Reused state (in/out)
878 */
879DECLINLINE(int) pgmRZPoolAccessPfHandlerSimple(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
880 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
881{
882 Log3(("pgmRZPoolAccessPfHandlerSimple\n"));
883 NOREF(pVM);
884 NOREF(pfReused); /* initialized by caller */
885
886 /*
887 * Increment the modification counter and insert it into the list
888 * of modified pages the first time.
889 */
890 if (!pPage->cModifications++)
891 pgmPoolMonitorModifiedInsert(pPool, pPage);
892
893 /*
894 * Clear all the pages. ASSUMES that pvFault is readable.
895 */
896# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
897 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
898# endif
899
900 uint32_t cbWrite = DISGetParamSize(pDis, &pDis->Param1);
901 if (cbWrite <= 8)
902 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, cbWrite);
903 else if (cbWrite <= 16)
904 {
905 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, 8);
906 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + 8, NULL, cbWrite - 8);
907 }
908 else
909 {
910 Assert(cbWrite <= 32);
911 for (uint32_t off = 0; off < cbWrite; off += 8)
912 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + off, NULL, RT_MIN(8, cbWrite - off));
913 }
914
915# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
916 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
917# endif
918
919 /*
920 * Interpret the instruction.
921 */
922 VBOXSTRICTRC rc = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
923 if (RT_SUCCESS(rc))
924 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
925 else if (rc == VERR_EM_INTERPRETER)
926 {
927 LogFlow(("pgmRZPoolAccessPfHandlerSimple: Interpretation failed for %04x:%RGv - opcode=%d\n",
928 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode));
929 rc = VINF_EM_RAW_EMULATE_INSTR;
930 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
931 }
932
933# if 0 /* experimental code */
934 if (rc == VINF_SUCCESS)
935 {
936 switch (pPage->enmKind)
937 {
938 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
939 {
940 X86PTEPAE GstPte;
941 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
942 AssertRC(rc);
943
944 /* Check the new value written by the guest. If present and with a bogus physical address, then
945 * it's fairly safe to assume the guest is reusing the PT.
946 */
947 if (GstPte.n.u1Present)
948 {
949 RTHCPHYS HCPhys = -1;
950 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
951 if (rc != VINF_SUCCESS)
952 {
953 *pfReused = true;
954 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
955 }
956 }
957 break;
958 }
959 }
960 }
961# endif
962
963 LogFlow(("pgmRZPoolAccessPfHandlerSimple: returns %Rrc\n", VBOXSTRICTRC_VAL(rc)));
964 return VBOXSTRICTRC_VAL(rc);
965}
966
967
968/**
969 * @callback_method_impl{FNPGMRZPHYSPFHANDLER,
970 * \#PF access handler callback for page table pages.}
971 *
972 * @remarks The @a pvUser argument points to the PGMPOOLPAGE.
973 */
974DECLEXPORT(VBOXSTRICTRC) pgmRZPoolAccessPfHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame,
975 RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
976{
977 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorRZ, a);
978 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
979 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
980 unsigned cMaxModifications;
981 bool fForcedFlush = false;
982 NOREF(uErrorCode);
983
984 LogFlow(("pgmRZPoolAccessPfHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
985
986 pgmLock(pVM);
987 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
988 {
989 /* Pool page changed while we were waiting for the lock; ignore. */
990 Log(("CPU%d: pgmRZPoolAccessPfHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
991 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
992 pgmUnlock(pVM);
993 return VINF_SUCCESS;
994 }
995# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
996 if (pPage->fDirty)
997 {
998 Assert(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH));
999 pgmUnlock(pVM);
1000 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1001 }
1002# endif
1003
1004# if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1005 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1006 {
1007 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1008 void *pvGst;
1009 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1010 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1011 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1012 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1013 }
1014# endif
1015
1016 /*
1017 * Disassemble the faulting instruction.
1018 */
1019 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1020 int rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
1021 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1022 {
1023 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1024 pgmUnlock(pVM);
1025 return rc;
1026 }
1027
1028 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1029
1030 /*
1031 * We should ALWAYS have the list head as user parameter. This
1032 * is because we use that page to record the changes.
1033 */
1034 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1035
1036# ifdef IN_RING0
1037 /* Maximum nr of modifications depends on the page type. */
1038 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1039 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1040 cMaxModifications = 4;
1041 else
1042 cMaxModifications = 24;
1043# else
1044 cMaxModifications = 48;
1045# endif
1046
1047 /*
1048 * Incremental page table updates should weigh more than random ones.
1049 * (Only applies when started from offset 0)
1050 */
1051 pVCpu->pgm.s.cPoolAccessHandler++;
1052 if ( pPage->GCPtrLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1053 && pPage->GCPtrLastAccessHandlerRip < pRegFrame->rip + 0x40
1054 && pvFault == (pPage->GCPtrLastAccessHandlerFault + pDis->Param1.cb)
1055 && pVCpu->pgm.s.cPoolAccessHandler == pPage->cLastAccessHandler + 1)
1056 {
1057 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1058 Assert(pPage->cModifications < 32000);
1059 pPage->cModifications = pPage->cModifications * 2;
1060 pPage->GCPtrLastAccessHandlerFault = pvFault;
1061 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1062 if (pPage->cModifications >= cMaxModifications)
1063 {
1064 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushReinit);
1065 fForcedFlush = true;
1066 }
1067 }
1068
1069 if (pPage->cModifications >= cMaxModifications)
1070 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1071
1072 /*
1073 * Check if it's worth dealing with.
1074 */
1075 bool fReused = false;
1076 bool fNotReusedNotForking = false;
1077 if ( ( pPage->cModifications < cMaxModifications /** @todo \#define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1078 || pgmPoolIsPageLocked(pPage)
1079 )
1080 && !(fReused = pgmRZPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault, pPage))
1081 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1082 {
1083 /*
1084 * Simple instructions, no REP prefix.
1085 */
1086 if (!(pDis->fPrefix & (DISPREFIX_REP | DISPREFIX_REPNE)))
1087 {
1088 rc = pgmRZPoolAccessPfHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1089 if (fReused)
1090 goto flushPage;
1091
1092 /* A mov instruction to change the first page table entry will be remembered so we can detect
1093 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1094 */
1095 if ( rc == VINF_SUCCESS
1096 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1097 && pDis->pCurInstr->uOpcode == OP_MOV
1098 && (pvFault & PAGE_OFFSET_MASK) == 0)
1099 {
1100 pPage->GCPtrLastAccessHandlerFault = pvFault;
1101 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1102 pPage->GCPtrLastAccessHandlerRip = pRegFrame->rip;
1103 /* Make sure we don't kick out a page too quickly. */
1104 if (pPage->cModifications > 8)
1105 pPage->cModifications = 2;
1106 }
1107 else if (pPage->GCPtrLastAccessHandlerFault == pvFault)
1108 {
1109 /* ignore the 2nd write to this page table entry. */
1110 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1111 }
1112 else
1113 {
1114 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
1115 pPage->GCPtrLastAccessHandlerRip = 0;
1116 }
1117
1118 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
1119 pgmUnlock(pVM);
1120 return rc;
1121 }
1122
1123 /*
1124 * Windows is frequently doing small memset() operations (netio test 4k+).
1125 * We have to deal with these or we'll kill the cache and performance.
1126 */
1127 if ( pDis->pCurInstr->uOpcode == OP_STOSWD
1128 && !pRegFrame->eflags.Bits.u1DF
1129 && pDis->uOpMode == pDis->uCpuMode
1130 && pDis->uAddrMode == pDis->uCpuMode)
1131 {
1132 bool fValidStosd = false;
1133
1134 if ( pDis->uCpuMode == DISCPUMODE_32BIT
1135 && pDis->fPrefix == DISPREFIX_REP
1136 && pRegFrame->ecx <= 0x20
1137 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1138 && !((uintptr_t)pvFault & 3)
1139 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1140 )
1141 {
1142 fValidStosd = true;
1143 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1144 }
1145 else
1146 if ( pDis->uCpuMode == DISCPUMODE_64BIT
1147 && pDis->fPrefix == (DISPREFIX_REP | DISPREFIX_REX)
1148 && pRegFrame->rcx <= 0x20
1149 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1150 && !((uintptr_t)pvFault & 7)
1151 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1152 )
1153 {
1154 fValidStosd = true;
1155 }
1156
1157 if (fValidStosd)
1158 {
1159 rc = pgmRZPoolAccessPfHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1160 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZRepStosd, a);
1161 pgmUnlock(pVM);
1162 return rc;
1163 }
1164 }
1165
1166 /* REP prefix, don't bother. */
1167 STAM_COUNTER_INC(&pPool->StatMonitorPfRZRepPrefix);
1168 Log4(("pgmRZPoolAccessPfHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1169 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode, pDis->fPrefix));
1170 fNotReusedNotForking = true;
1171 }
1172
1173# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1174 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1175 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1176 */
1177 if ( pPage->cModifications >= cMaxModifications
1178 && !fForcedFlush
1179 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1180 && ( fNotReusedNotForking
1181 || ( !pgmRZPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault, pPage)
1182 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1183 )
1184 )
1185 {
1186 Assert(!pgmPoolIsPageLocked(pPage));
1187 Assert(pPage->fDirty == false);
1188
1189 /* Flush any monitored duplicates as we will disable write protection. */
1190 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1191 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1192 {
1193 PPGMPOOLPAGE pPageHead = pPage;
1194
1195 /* Find the monitor head. */
1196 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1197 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1198
1199 while (pPageHead)
1200 {
1201 unsigned idxNext = pPageHead->iMonitoredNext;
1202
1203 if (pPageHead != pPage)
1204 {
1205 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1206 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1207 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1208 AssertRC(rc2);
1209 }
1210
1211 if (idxNext == NIL_PGMPOOL_IDX)
1212 break;
1213
1214 pPageHead = &pPool->aPages[idxNext];
1215 }
1216 }
1217
1218 /* The flushing above might fail for locked pages, so double check. */
1219 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1220 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1221 {
1222 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1223
1224 /* Temporarily allow write access to the page table again. */
1225 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK, pPage->GCPhys & PAGE_BASE_GC_MASK);
1226 if (rc == VINF_SUCCESS)
1227 {
1228 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1229 AssertMsg(rc == VINF_SUCCESS
1230 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1231 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1232 || rc == VERR_PAGE_NOT_PRESENT,
1233 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1234# ifdef VBOX_STRICT
1235 pPage->GCPtrDirtyFault = pvFault;
1236# endif
1237
1238 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, a);
1239 pgmUnlock(pVM);
1240 return rc;
1241 }
1242 }
1243 }
1244# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT && IN_RING0 */
1245
1246 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushModOverflow);
1247flushPage:
1248 /*
1249 * Not worth it, so flush it.
1250 *
1251 * If we considered it to be reused, don't go back to ring-3
1252 * to emulate failed instructions since we usually cannot
1253 * interpret then. This may be a bit risky, in which case
1254 * the reuse detection must be fixed.
1255 */
1256 rc = pgmRZPoolAccessPfHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1257 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1258 && fReused)
1259 {
1260 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1261 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1262 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1263 }
1264 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZFlushPage, a);
1265 pgmUnlock(pVM);
1266 return rc;
1267}
1268
1269#endif /* !IN_RING3 */
1270
1271/**
1272 * @callback_method_impl{FNPGMPHYSHANDLER,
1273 * Access handler for shadowed page table pages.}
1274 *
1275 * @remarks Only uses the VINF_PGM_HANDLER_DO_DEFAULT status.
1276 */
1277PGM_ALL_CB2_DECL(VBOXSTRICTRC)
1278pgmPoolAccessHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf,
1279 PGMACCESSTYPE enmAccessType, PGMACCESSORIGIN enmOrigin, void *pvUser)
1280{
1281 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1282 STAM_PROFILE_START(&pPool->CTX_SUFF_Z(StatMonitor), a);
1283 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1284 LogFlow(("PGM_ALL_CB_DECL: GCPhys=%RGp %p:{.Core=%RHp, .idx=%d, .GCPhys=%RGp, .enmType=%d}\n",
1285 GCPhys, pPage, pPage->Core.Key, pPage->idx, pPage->GCPhys, pPage->enmKind));
1286
1287 NOREF(pvPhys); NOREF(pvBuf); NOREF(enmAccessType);
1288
1289 pgmLock(pVM);
1290
1291#ifdef VBOX_WITH_STATISTICS
1292 /*
1293 * Collect stats on the access.
1294 */
1295 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Sizes)) == 19);
1296 if (cbBuf <= 16 && cbBuf > 0)
1297 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[cbBuf - 1]);
1298 else if (cbBuf >= 17 && cbBuf < 32)
1299 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[16]);
1300 else if (cbBuf >= 32 && cbBuf < 64)
1301 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[17]);
1302 else if (cbBuf >= 64)
1303 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[18]);
1304
1305 uint8_t cbAlign;
1306 switch (pPage->enmKind)
1307 {
1308 default:
1309 cbAlign = 7;
1310 break;
1311 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1312 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1313 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1314 case PGMPOOLKIND_32BIT_PD:
1315 case PGMPOOLKIND_32BIT_PD_PHYS:
1316 cbAlign = 3;
1317 break;
1318 }
1319 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Misaligned)) == 7);
1320 if ((uint8_t)GCPhys & cbAlign)
1321 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Misaligned)[((uint8_t)GCPhys & cbAlign) - 1]);
1322#endif
1323
1324 /*
1325 * Make sure the pool page wasn't modified by a different CPU.
1326 */
1327 if (PHYS_PAGE_ADDRESS(GCPhys) == PHYS_PAGE_ADDRESS(pPage->GCPhys))
1328 {
1329 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1330
1331 /* The max modification count before flushing depends on the context and page type. */
1332#ifdef IN_RING3
1333 uint16_t const cMaxModifications = 96; /* it's cheaper here, right? */
1334#else
1335 uint16_t cMaxModifications;
1336 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1337 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1338 cMaxModifications = 4;
1339 else
1340 cMaxModifications = 24;
1341#endif
1342
1343 /*
1344 * We don't have to be very sophisticated about this since there are relativly few calls here.
1345 * However, we must try our best to detect any non-cpu accesses (disk / networking).
1346 */
1347 if ( ( pPage->cModifications < cMaxModifications
1348 || pgmPoolIsPageLocked(pPage) )
1349 && enmOrigin != PGMACCESSORIGIN_DEVICE
1350 && cbBuf <= 16)
1351 {
1352 /* Clear the shadow entry. */
1353 if (!pPage->cModifications++)
1354 pgmPoolMonitorModifiedInsert(pPool, pPage);
1355
1356 if (cbBuf <= 8)
1357 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, (uint32_t)cbBuf);
1358 else
1359 {
1360 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, 8);
1361 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys + 8, (uint8_t *)pvBuf + 8, (uint32_t)cbBuf - 8);
1362 }
1363 }
1364 else
1365 pgmPoolMonitorChainFlush(pPool, pPage);
1366
1367 STAM_PROFILE_STOP_EX(&pPool->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1368 }
1369 else
1370 Log(("CPU%d: PGM_ALL_CB_DECL pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhys), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1371 pgmUnlock(pVM);
1372 return VINF_PGM_HANDLER_DO_DEFAULT;
1373}
1374
1375
1376#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1377
1378# if defined(VBOX_STRICT) && !defined(IN_RING3)
1379
1380/**
1381 * Check references to guest physical memory in a PAE / PAE page table.
1382 *
1383 * @param pPool The pool.
1384 * @param pPage The page.
1385 * @param pShwPT The shadow page table (mapping of the page).
1386 * @param pGstPT The guest page table.
1387 */
1388static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1389{
1390 unsigned cErrors = 0;
1391 int LastRc = -1; /* initialized to shut up gcc */
1392 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1393 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1394 PVMCC pVM = pPool->CTX_SUFF(pVM);
1395
1396# ifdef VBOX_STRICT
1397 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1398 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1399# endif
1400 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1401 {
1402 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1403 {
1404 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1405 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1406 if ( rc != VINF_SUCCESS
1407 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1408 {
1409 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1410 LastPTE = i;
1411 LastRc = rc;
1412 LastHCPhys = HCPhys;
1413 cErrors++;
1414
1415 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1416 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1417 AssertRC(rc);
1418
1419 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1420 {
1421 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1422
1423 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1424 {
1425 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1426
1427 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1428 {
1429 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1430 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1431 {
1432 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1433 }
1434 }
1435
1436 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1437 }
1438 }
1439 }
1440 }
1441 }
1442 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1443}
1444
1445
1446/**
1447 * Check references to guest physical memory in a PAE / 32-bit page table.
1448 *
1449 * @param pPool The pool.
1450 * @param pPage The page.
1451 * @param pShwPT The shadow page table (mapping of the page).
1452 * @param pGstPT The guest page table.
1453 */
1454static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1455{
1456 unsigned cErrors = 0;
1457 int LastRc = -1; /* initialized to shut up gcc */
1458 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1459 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1460 PVMCC pVM = pPool->CTX_SUFF(pVM);
1461
1462# ifdef VBOX_STRICT
1463 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1464 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1465# endif
1466 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1467 {
1468 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1469 {
1470 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1471 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1472 if ( rc != VINF_SUCCESS
1473 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1474 {
1475 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1476 LastPTE = i;
1477 LastRc = rc;
1478 LastHCPhys = HCPhys;
1479 cErrors++;
1480
1481 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1482 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1483 AssertRC(rc);
1484
1485 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1486 {
1487 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1488
1489 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1490 {
1491 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1492
1493 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1494 {
1495 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1496 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1497 {
1498 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1499 }
1500 }
1501
1502 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1503 }
1504 }
1505 }
1506 }
1507 }
1508 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1509}
1510
1511# endif /* VBOX_STRICT && !IN_RING3 */
1512
1513/**
1514 * Clear references to guest physical memory in a PAE / PAE page table.
1515 *
1516 * @returns nr of changed PTEs
1517 * @param pPool The pool.
1518 * @param pPage The page.
1519 * @param pShwPT The shadow page table (mapping of the page).
1520 * @param pGstPT The guest page table.
1521 * @param pOldGstPT The old cached guest page table.
1522 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1523 * @param pfFlush Flush reused page table (out)
1524 */
1525DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1526 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1527{
1528 unsigned cChanged = 0;
1529
1530# ifdef VBOX_STRICT
1531 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1532 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1533# endif
1534 *pfFlush = false;
1535
1536 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1537 {
1538 /* Check the new value written by the guest. If present and with a bogus physical address, then
1539 * it's fairly safe to assume the guest is reusing the PT.
1540 */
1541 if ( fAllowRemoval
1542 && pGstPT->a[i].n.u1Present)
1543 {
1544 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1545 {
1546 *pfFlush = true;
1547 return ++cChanged;
1548 }
1549 }
1550 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1551 {
1552 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1553 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1554 {
1555# ifdef VBOX_STRICT
1556 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1557 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1558 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1559# endif
1560 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1561 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1562 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1563 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1564
1565 if ( uHostAttr == uGuestAttr
1566 && fHostRW <= fGuestRW)
1567 continue;
1568 }
1569 cChanged++;
1570 /* Something was changed, so flush it. */
1571 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1572 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1573 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1574 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1575 }
1576 }
1577 return cChanged;
1578}
1579
1580
1581/**
1582 * Clear references to guest physical memory in a PAE / PAE page table.
1583 *
1584 * @returns nr of changed PTEs
1585 * @param pPool The pool.
1586 * @param pPage The page.
1587 * @param pShwPT The shadow page table (mapping of the page).
1588 * @param pGstPT The guest page table.
1589 * @param pOldGstPT The old cached guest page table.
1590 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1591 * @param pfFlush Flush reused page table (out)
1592 */
1593DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1594 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1595{
1596 unsigned cChanged = 0;
1597
1598# ifdef VBOX_STRICT
1599 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1600 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1601# endif
1602 *pfFlush = false;
1603
1604 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1605 {
1606 /* Check the new value written by the guest. If present and with a bogus physical address, then
1607 * it's fairly safe to assume the guest is reusing the PT.
1608 */
1609 if ( fAllowRemoval
1610 && pGstPT->a[i].n.u1Present)
1611 {
1612 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK))
1613 {
1614 *pfFlush = true;
1615 return ++cChanged;
1616 }
1617 }
1618 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1619 {
1620 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1621 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1622 {
1623# ifdef VBOX_STRICT
1624 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1625 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1626 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1627# endif
1628 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1629 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1630 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1631 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1632
1633 if ( uHostAttr == uGuestAttr
1634 && fHostRW <= fGuestRW)
1635 continue;
1636 }
1637 cChanged++;
1638 /* Something was changed, so flush it. */
1639 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1640 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1641 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1642 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1643 }
1644 }
1645 return cChanged;
1646}
1647
1648
1649/**
1650 * Flush a dirty page
1651 *
1652 * @param pVM The cross context VM structure.
1653 * @param pPool The pool.
1654 * @param idxSlot Dirty array slot index
1655 * @param fAllowRemoval Allow a reused page table to be removed
1656 */
1657static void pgmPoolFlushDirtyPage(PVMCC pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1658{
1659 AssertCompile(RT_ELEMENTS(pPool->aidxDirtyPages) == RT_ELEMENTS(pPool->aDirtyPages));
1660
1661 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1662 unsigned idxPage = pPool->aidxDirtyPages[idxSlot];
1663 if (idxPage == NIL_PGMPOOL_IDX)
1664 return;
1665
1666 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1667 Assert(pPage->idx == idxPage);
1668 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1669
1670 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1671 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1672
1673# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1674 PVMCPU pVCpu = VMMGetCpu(pVM);
1675 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
1676# endif
1677
1678 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1679 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK);
1680 Assert(rc == VINF_SUCCESS);
1681 pPage->fDirty = false;
1682
1683# ifdef VBOX_STRICT
1684 uint64_t fFlags = 0;
1685 RTHCPHYS HCPhys;
1686 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->GCPtrDirtyFault, &fFlags, &HCPhys);
1687 AssertMsg( ( rc == VINF_SUCCESS
1688 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1689 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1690 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1691 || rc == VERR_PAGE_NOT_PRESENT,
1692 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->GCPtrDirtyFault, rc, fFlags));
1693# endif
1694
1695 /* Flush those PTEs that have changed. */
1696 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1697 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1698 void *pvGst;
1699 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1700 bool fFlush;
1701 unsigned cChanges;
1702
1703 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1704 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1705 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1706 else
1707 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1708 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1709
1710 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1711 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1712 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1713 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1714
1715 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1716 Assert(pPage->cModifications);
1717 if (cChanges < 4)
1718 pPage->cModifications = 1; /* must use > 0 here */
1719 else
1720 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1721
1722 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1723 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1724 pPool->idxFreeDirtyPage = idxSlot;
1725
1726 pPool->cDirtyPages--;
1727 pPool->aidxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1728 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1729 if (fFlush)
1730 {
1731 Assert(fAllowRemoval);
1732 Log(("Flush reused page table!\n"));
1733 pgmPoolFlushPage(pPool, pPage);
1734 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1735 }
1736 else
1737 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1738
1739# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1740 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
1741# endif
1742}
1743
1744
1745# ifndef IN_RING3
1746/**
1747 * Add a new dirty page
1748 *
1749 * @param pVM The cross context VM structure.
1750 * @param pPool The pool.
1751 * @param pPage The page.
1752 */
1753void pgmPoolAddDirtyPage(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1754{
1755 PGM_LOCK_ASSERT_OWNER(pVM);
1756 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1757 Assert(!pPage->fDirty);
1758
1759 unsigned idxFree = pPool->idxFreeDirtyPage;
1760 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1761 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1762
1763 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1764 {
1765 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1766 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1767 }
1768 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1769 AssertMsg(pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1770
1771 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1772
1773 /*
1774 * Make a copy of the guest page table as we require valid GCPhys addresses
1775 * when removing references to physical pages.
1776 * (The HCPhys linear lookup is *extremely* expensive!)
1777 */
1778 void *pvGst;
1779 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1780 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst, (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT) ? PAGE_SIZE : PAGE_SIZE/2);
1781# ifdef VBOX_STRICT
1782 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1783 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1784 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1785 else
1786 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1787 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1788# endif
1789 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1790
1791 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1792 pPage->fDirty = true;
1793 pPage->idxDirtyEntry = (uint8_t)idxFree; Assert(pPage->idxDirtyEntry == idxFree);
1794 pPool->aidxDirtyPages[idxFree] = pPage->idx;
1795 pPool->cDirtyPages++;
1796
1797 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1798 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1799 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1800 {
1801 unsigned i;
1802 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1803 {
1804 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1805 if (pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1806 {
1807 pPool->idxFreeDirtyPage = idxFree;
1808 break;
1809 }
1810 }
1811 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1812 }
1813
1814 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1815
1816 /*
1817 * Clear all references to this shadow table. See @bugref{7298}.
1818 */
1819 pgmPoolTrackClearPageUsers(pPool, pPage);
1820}
1821# endif /* !IN_RING3 */
1822
1823
1824/**
1825 * Check if the specified page is dirty (not write monitored)
1826 *
1827 * @return dirty or not
1828 * @param pVM The cross context VM structure.
1829 * @param GCPhys Guest physical address
1830 */
1831bool pgmPoolIsDirtyPageSlow(PVM pVM, RTGCPHYS GCPhys)
1832{
1833 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1834 PGM_LOCK_ASSERT_OWNER(pVM);
1835 if (!pPool->cDirtyPages)
1836 return false;
1837
1838 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1839
1840 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1841 {
1842 unsigned idxPage = pPool->aidxDirtyPages[i];
1843 if (idxPage != NIL_PGMPOOL_IDX)
1844 {
1845 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1846 if (pPage->GCPhys == GCPhys)
1847 return true;
1848 }
1849 }
1850 return false;
1851}
1852
1853
1854/**
1855 * Reset all dirty pages by reinstating page monitoring.
1856 *
1857 * @param pVM The cross context VM structure.
1858 */
1859void pgmPoolResetDirtyPages(PVMCC pVM)
1860{
1861 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1862 PGM_LOCK_ASSERT_OWNER(pVM);
1863 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1864
1865 if (!pPool->cDirtyPages)
1866 return;
1867
1868 Log(("pgmPoolResetDirtyPages\n"));
1869 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1870 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1871
1872 pPool->idxFreeDirtyPage = 0;
1873 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1874 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1875 {
1876 unsigned i;
1877 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1878 {
1879 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
1880 {
1881 pPool->idxFreeDirtyPage = i;
1882 break;
1883 }
1884 }
1885 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1886 }
1887
1888 Assert(pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
1889 return;
1890}
1891
1892
1893/**
1894 * Invalidate the PT entry for the specified page
1895 *
1896 * @param pVM The cross context VM structure.
1897 * @param GCPtrPage Guest page to invalidate
1898 */
1899void pgmPoolResetDirtyPage(PVM pVM, RTGCPTR GCPtrPage)
1900{
1901 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1902 PGM_LOCK_ASSERT_OWNER(pVM);
1903 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1904
1905 if (!pPool->cDirtyPages)
1906 return;
1907
1908 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage)); RT_NOREF_PV(GCPtrPage);
1909 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1910 {
1911 /** @todo What was intended here??? This looks incomplete... */
1912 }
1913}
1914
1915
1916/**
1917 * Reset all dirty pages by reinstating page monitoring.
1918 *
1919 * @param pVM The cross context VM structure.
1920 * @param GCPhysPT Physical address of the page table
1921 */
1922void pgmPoolInvalidateDirtyPage(PVMCC pVM, RTGCPHYS GCPhysPT)
1923{
1924 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1925 PGM_LOCK_ASSERT_OWNER(pVM);
1926 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1927 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
1928
1929 if (!pPool->cDirtyPages)
1930 return;
1931
1932 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1933
1934 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1935 {
1936 unsigned idxPage = pPool->aidxDirtyPages[i];
1937 if (idxPage != NIL_PGMPOOL_IDX)
1938 {
1939 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1940 if (pPage->GCPhys == GCPhysPT)
1941 {
1942 idxDirtyPage = i;
1943 break;
1944 }
1945 }
1946 }
1947
1948 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
1949 {
1950 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
1951 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1952 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1953 {
1954 unsigned i;
1955 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1956 {
1957 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
1958 {
1959 pPool->idxFreeDirtyPage = i;
1960 break;
1961 }
1962 }
1963 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1964 }
1965 }
1966}
1967
1968#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1969
1970/**
1971 * Inserts a page into the GCPhys hash table.
1972 *
1973 * @param pPool The pool.
1974 * @param pPage The page.
1975 */
1976DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1977{
1978 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1979 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1980 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1981 pPage->iNext = pPool->aiHash[iHash];
1982 pPool->aiHash[iHash] = pPage->idx;
1983}
1984
1985
1986/**
1987 * Removes a page from the GCPhys hash table.
1988 *
1989 * @param pPool The pool.
1990 * @param pPage The page.
1991 */
1992DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1993{
1994 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1995 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1996 if (pPool->aiHash[iHash] == pPage->idx)
1997 pPool->aiHash[iHash] = pPage->iNext;
1998 else
1999 {
2000 uint16_t iPrev = pPool->aiHash[iHash];
2001 for (;;)
2002 {
2003 const int16_t i = pPool->aPages[iPrev].iNext;
2004 if (i == pPage->idx)
2005 {
2006 pPool->aPages[iPrev].iNext = pPage->iNext;
2007 break;
2008 }
2009 if (i == NIL_PGMPOOL_IDX)
2010 {
2011 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
2012 break;
2013 }
2014 iPrev = i;
2015 }
2016 }
2017 pPage->iNext = NIL_PGMPOOL_IDX;
2018}
2019
2020
2021/**
2022 * Frees up one cache page.
2023 *
2024 * @returns VBox status code.
2025 * @retval VINF_SUCCESS on success.
2026 * @param pPool The pool.
2027 * @param iUser The user index.
2028 */
2029static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
2030{
2031 const PVMCC pVM = pPool->CTX_SUFF(pVM);
2032 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
2033 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
2034
2035 /*
2036 * Select one page from the tail of the age list.
2037 */
2038 PPGMPOOLPAGE pPage;
2039 for (unsigned iLoop = 0; ; iLoop++)
2040 {
2041 uint16_t iToFree = pPool->iAgeTail;
2042 if (iToFree == iUser && iUser != NIL_PGMPOOL_IDX)
2043 iToFree = pPool->aPages[iToFree].iAgePrev;
2044/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2045 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2046 {
2047 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2048 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2049 {
2050 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2051 continue;
2052 iToFree = i;
2053 break;
2054 }
2055 }
2056*/
2057 Assert(iToFree != iUser);
2058 AssertReleaseMsg(iToFree != NIL_PGMPOOL_IDX, ("iToFree=%#x (%#x)\n%.1024Rhxd\n", iToFree, pPool->iAgeTail, pPool));
2059 pPage = &pPool->aPages[iToFree];
2060
2061 /*
2062 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2063 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2064 */
2065 if ( !pgmPoolIsPageLocked(pPage)
2066 && pPage->idx >= PGMPOOL_IDX_FIRST /* paranoia (#6349) */)
2067 break;
2068 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2069 pgmPoolCacheUsed(pPool, pPage);
2070 AssertLogRelReturn(iLoop < 8192, VERR_PGM_POOL_TOO_MANY_LOOPS);
2071 }
2072
2073 /*
2074 * Found a usable page, flush it and return.
2075 */
2076 int rc = pgmPoolFlushPage(pPool, pPage);
2077 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2078 /** @todo find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2079 if (rc == VINF_SUCCESS)
2080 PGM_INVL_ALL_VCPU_TLBS(pVM);
2081 return rc;
2082}
2083
2084
2085/**
2086 * Checks if a kind mismatch is really a page being reused
2087 * or if it's just normal remappings.
2088 *
2089 * @returns true if reused and the cached page (enmKind1) should be flushed
2090 * @returns false if not reused.
2091 * @param enmKind1 The kind of the cached page.
2092 * @param enmKind2 The kind of the requested page.
2093 */
2094static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2095{
2096 switch (enmKind1)
2097 {
2098 /*
2099 * Never reuse them. There is no remapping in non-paging mode.
2100 */
2101 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2102 case PGMPOOLKIND_32BIT_PD_PHYS:
2103 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2104 case PGMPOOLKIND_PAE_PD_PHYS:
2105 case PGMPOOLKIND_PAE_PDPT_PHYS:
2106 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2107 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2108 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2109 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2110 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2111 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2112 return false;
2113
2114 /*
2115 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2116 */
2117 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2118 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2119 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2120 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2121 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2122 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2123 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2124 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2125 case PGMPOOLKIND_32BIT_PD:
2126 case PGMPOOLKIND_PAE_PDPT:
2127 switch (enmKind2)
2128 {
2129 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2130 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2131 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2132 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2133 case PGMPOOLKIND_64BIT_PML4:
2134 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2135 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2136 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2137 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2138 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2139 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2140 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2141 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2142 return true;
2143 default:
2144 return false;
2145 }
2146
2147 /*
2148 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2149 */
2150 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2151 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2152 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2153 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2154 case PGMPOOLKIND_64BIT_PML4:
2155 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2156 switch (enmKind2)
2157 {
2158 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2159 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2160 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2161 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2162 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2163 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2164 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2165 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2166 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2167 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2168 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2169 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2170 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2171 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2172 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2173 return true;
2174 default:
2175 return false;
2176 }
2177
2178 /*
2179 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2180 */
2181 case PGMPOOLKIND_ROOT_NESTED:
2182 return false;
2183
2184 default:
2185 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2186 }
2187}
2188
2189
2190/**
2191 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2192 *
2193 * @returns VBox status code.
2194 * @retval VINF_PGM_CACHED_PAGE on success.
2195 * @retval VERR_FILE_NOT_FOUND if not found.
2196 * @param pPool The pool.
2197 * @param GCPhys The GC physical address of the page we're gonna shadow.
2198 * @param enmKind The kind of mapping.
2199 * @param enmAccess Access type for the mapping (only relevant for big pages)
2200 * @param fA20Enabled Whether the CPU has the A20 gate enabled.
2201 * @param iUser The shadow page pool index of the user table. This is
2202 * NIL_PGMPOOL_IDX for root pages.
2203 * @param iUserTable The index into the user table (shadowed). Ignored if
2204 * root page
2205 * @param ppPage Where to store the pointer to the page.
2206 */
2207static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
2208 uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2209{
2210 /*
2211 * Look up the GCPhys in the hash.
2212 */
2213 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2214 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2215 if (i != NIL_PGMPOOL_IDX)
2216 {
2217 do
2218 {
2219 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2220 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2221 if (pPage->GCPhys == GCPhys)
2222 {
2223 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2224 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess
2225 && pPage->fA20Enabled == fA20Enabled)
2226 {
2227 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2228 * doesn't flush it in case there are no more free use records.
2229 */
2230 pgmPoolCacheUsed(pPool, pPage);
2231
2232 int rc = VINF_SUCCESS;
2233 if (iUser != NIL_PGMPOOL_IDX)
2234 rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2235 if (RT_SUCCESS(rc))
2236 {
2237 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2238 *ppPage = pPage;
2239 if (pPage->cModifications)
2240 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2241 STAM_COUNTER_INC(&pPool->StatCacheHits);
2242 return VINF_PGM_CACHED_PAGE;
2243 }
2244 return rc;
2245 }
2246
2247 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2248 {
2249 /*
2250 * The kind is different. In some cases we should now flush the page
2251 * as it has been reused, but in most cases this is normal remapping
2252 * of PDs as PT or big pages using the GCPhys field in a slightly
2253 * different way than the other kinds.
2254 */
2255 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2256 {
2257 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2258 pgmPoolFlushPage(pPool, pPage);
2259 break;
2260 }
2261 }
2262 }
2263
2264 /* next */
2265 i = pPage->iNext;
2266 } while (i != NIL_PGMPOOL_IDX);
2267 }
2268
2269 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2270 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2271 return VERR_FILE_NOT_FOUND;
2272}
2273
2274
2275/**
2276 * Inserts a page into the cache.
2277 *
2278 * @param pPool The pool.
2279 * @param pPage The cached page.
2280 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2281 */
2282static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2283{
2284 /*
2285 * Insert into the GCPhys hash if the page is fit for that.
2286 */
2287 Assert(!pPage->fCached);
2288 if (fCanBeCached)
2289 {
2290 pPage->fCached = true;
2291 pgmPoolHashInsert(pPool, pPage);
2292 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2293 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2294 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2295 }
2296 else
2297 {
2298 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2299 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2300 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2301 }
2302
2303 /*
2304 * Insert at the head of the age list.
2305 */
2306 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2307 pPage->iAgeNext = pPool->iAgeHead;
2308 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2309 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2310 else
2311 pPool->iAgeTail = pPage->idx;
2312 pPool->iAgeHead = pPage->idx;
2313}
2314
2315
2316/**
2317 * Flushes a cached page.
2318 *
2319 * @param pPool The pool.
2320 * @param pPage The cached page.
2321 */
2322static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2323{
2324 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2325
2326 /*
2327 * Remove the page from the hash.
2328 */
2329 if (pPage->fCached)
2330 {
2331 pPage->fCached = false;
2332 pgmPoolHashRemove(pPool, pPage);
2333 }
2334 else
2335 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2336
2337 /*
2338 * Remove it from the age list.
2339 */
2340 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2341 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2342 else
2343 pPool->iAgeTail = pPage->iAgePrev;
2344 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2345 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2346 else
2347 pPool->iAgeHead = pPage->iAgeNext;
2348 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2349 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2350}
2351
2352
2353/**
2354 * Looks for pages sharing the monitor.
2355 *
2356 * @returns Pointer to the head page.
2357 * @returns NULL if not found.
2358 * @param pPool The Pool
2359 * @param pNewPage The page which is going to be monitored.
2360 */
2361static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2362{
2363 /*
2364 * Look up the GCPhys in the hash.
2365 */
2366 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2367 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2368 if (i == NIL_PGMPOOL_IDX)
2369 return NULL;
2370 do
2371 {
2372 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2373 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2374 && pPage != pNewPage)
2375 {
2376 switch (pPage->enmKind)
2377 {
2378 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2379 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2380 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2381 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2382 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2383 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2384 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2385 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2386 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2387 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2388 case PGMPOOLKIND_64BIT_PML4:
2389 case PGMPOOLKIND_32BIT_PD:
2390 case PGMPOOLKIND_PAE_PDPT:
2391 {
2392 /* find the head */
2393 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2394 {
2395 Assert(pPage->iMonitoredPrev != pPage->idx);
2396 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2397 }
2398 return pPage;
2399 }
2400
2401 /* ignore, no monitoring. */
2402 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2403 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2404 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2405 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2406 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2407 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2408 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2409 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2410 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2411 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2412 case PGMPOOLKIND_ROOT_NESTED:
2413 case PGMPOOLKIND_PAE_PD_PHYS:
2414 case PGMPOOLKIND_PAE_PDPT_PHYS:
2415 case PGMPOOLKIND_32BIT_PD_PHYS:
2416 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2417 break;
2418 default:
2419 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2420 }
2421 }
2422
2423 /* next */
2424 i = pPage->iNext;
2425 } while (i != NIL_PGMPOOL_IDX);
2426 return NULL;
2427}
2428
2429
2430/**
2431 * Enabled write monitoring of a guest page.
2432 *
2433 * @returns VBox status code.
2434 * @retval VINF_SUCCESS on success.
2435 * @param pPool The pool.
2436 * @param pPage The cached page.
2437 */
2438static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2439{
2440 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2441
2442 /*
2443 * Filter out the relevant kinds.
2444 */
2445 switch (pPage->enmKind)
2446 {
2447 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2448 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2449 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2450 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2451 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2452 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2453 case PGMPOOLKIND_64BIT_PML4:
2454 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2455 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2456 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2457 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2458 case PGMPOOLKIND_32BIT_PD:
2459 case PGMPOOLKIND_PAE_PDPT:
2460 break;
2461
2462 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2463 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2464 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2465 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2466 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2467 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2468 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2469 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2470 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2471 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2472 case PGMPOOLKIND_ROOT_NESTED:
2473 /* Nothing to monitor here. */
2474 return VINF_SUCCESS;
2475
2476 case PGMPOOLKIND_32BIT_PD_PHYS:
2477 case PGMPOOLKIND_PAE_PDPT_PHYS:
2478 case PGMPOOLKIND_PAE_PD_PHYS:
2479 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2480 /* Nothing to monitor here. */
2481 return VINF_SUCCESS;
2482 default:
2483 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2484 }
2485
2486 /*
2487 * Install handler.
2488 */
2489 int rc;
2490 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2491 if (pPageHead)
2492 {
2493 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2494 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2495
2496#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2497 if (pPageHead->fDirty)
2498 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirtyEntry, false /* do not remove */);
2499#endif
2500
2501 pPage->iMonitoredPrev = pPageHead->idx;
2502 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2503 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2504 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2505 pPageHead->iMonitoredNext = pPage->idx;
2506 rc = VINF_SUCCESS;
2507 }
2508 else
2509 {
2510 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2511 PVMCC pVM = pPool->CTX_SUFF(pVM);
2512 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2513 rc = PGMHandlerPhysicalRegister(pVM, GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK, pPool->hAccessHandlerType,
2514 MMHyperCCToR3(pVM, pPage), MMHyperCCToR0(pVM, pPage), MMHyperCCToRC(pVM, pPage),
2515 NIL_RTR3PTR /*pszDesc*/);
2516 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2517 * the heap size should suffice. */
2518 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2519 PVMCPU pVCpu = VMMGetCpu(pVM);
2520 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2521 }
2522 pPage->fMonitored = true;
2523 return rc;
2524}
2525
2526
2527/**
2528 * Disables write monitoring of a guest page.
2529 *
2530 * @returns VBox status code.
2531 * @retval VINF_SUCCESS on success.
2532 * @param pPool The pool.
2533 * @param pPage The cached page.
2534 */
2535static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2536{
2537 /*
2538 * Filter out the relevant kinds.
2539 */
2540 switch (pPage->enmKind)
2541 {
2542 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2543 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2544 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2545 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2546 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2547 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2548 case PGMPOOLKIND_64BIT_PML4:
2549 case PGMPOOLKIND_32BIT_PD:
2550 case PGMPOOLKIND_PAE_PDPT:
2551 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2552 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2553 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2554 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2555 break;
2556
2557 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2558 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2559 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2560 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2561 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2562 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2563 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2564 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2565 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2566 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2567 case PGMPOOLKIND_ROOT_NESTED:
2568 case PGMPOOLKIND_PAE_PD_PHYS:
2569 case PGMPOOLKIND_PAE_PDPT_PHYS:
2570 case PGMPOOLKIND_32BIT_PD_PHYS:
2571 /* Nothing to monitor here. */
2572 Assert(!pPage->fMonitored);
2573 return VINF_SUCCESS;
2574
2575 default:
2576 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2577 }
2578 Assert(pPage->fMonitored);
2579
2580 /*
2581 * Remove the page from the monitored list or uninstall it if last.
2582 */
2583 const PVMCC pVM = pPool->CTX_SUFF(pVM);
2584 int rc;
2585 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2586 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2587 {
2588 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2589 {
2590 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2591 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2592 rc = PGMHandlerPhysicalChangeUserArgs(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK,
2593 MMHyperCCToR3(pVM, pNewHead), MMHyperCCToR0(pVM, pNewHead));
2594
2595 AssertFatalRCSuccess(rc);
2596 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2597 }
2598 else
2599 {
2600 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2601 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2602 {
2603 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2604 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2605 }
2606 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2607 rc = VINF_SUCCESS;
2608 }
2609 }
2610 else
2611 {
2612 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2613 AssertFatalRC(rc);
2614 PVMCPU pVCpu = VMMGetCpu(pVM);
2615 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2616 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2617 }
2618 pPage->fMonitored = false;
2619
2620 /*
2621 * Remove it from the list of modified pages (if in it).
2622 */
2623 pgmPoolMonitorModifiedRemove(pPool, pPage);
2624
2625 return rc;
2626}
2627
2628
2629/**
2630 * Inserts the page into the list of modified pages.
2631 *
2632 * @param pPool The pool.
2633 * @param pPage The page.
2634 */
2635void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2636{
2637 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2638 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2639 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2640 && pPool->iModifiedHead != pPage->idx,
2641 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2642 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2643 pPool->iModifiedHead, pPool->cModifiedPages));
2644
2645 pPage->iModifiedNext = pPool->iModifiedHead;
2646 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2647 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2648 pPool->iModifiedHead = pPage->idx;
2649 pPool->cModifiedPages++;
2650#ifdef VBOX_WITH_STATISTICS
2651 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2652 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2653#endif
2654}
2655
2656
2657/**
2658 * Removes the page from the list of modified pages and resets the
2659 * modification counter.
2660 *
2661 * @param pPool The pool.
2662 * @param pPage The page which is believed to be in the list of modified pages.
2663 */
2664static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2665{
2666 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2667 if (pPool->iModifiedHead == pPage->idx)
2668 {
2669 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2670 pPool->iModifiedHead = pPage->iModifiedNext;
2671 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2672 {
2673 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2674 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2675 }
2676 pPool->cModifiedPages--;
2677 }
2678 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2679 {
2680 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2681 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2682 {
2683 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2684 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2685 }
2686 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2687 pPool->cModifiedPages--;
2688 }
2689 else
2690 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2691 pPage->cModifications = 0;
2692}
2693
2694
2695/**
2696 * Zaps the list of modified pages, resetting their modification counters in the process.
2697 *
2698 * @param pVM The cross context VM structure.
2699 */
2700static void pgmPoolMonitorModifiedClearAll(PVMCC pVM)
2701{
2702 pgmLock(pVM);
2703 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2704 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2705
2706 unsigned cPages = 0; NOREF(cPages);
2707
2708#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2709 pgmPoolResetDirtyPages(pVM);
2710#endif
2711
2712 uint16_t idx = pPool->iModifiedHead;
2713 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2714 while (idx != NIL_PGMPOOL_IDX)
2715 {
2716 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2717 idx = pPage->iModifiedNext;
2718 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2719 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2720 pPage->cModifications = 0;
2721 Assert(++cPages);
2722 }
2723 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2724 pPool->cModifiedPages = 0;
2725 pgmUnlock(pVM);
2726}
2727
2728
2729/**
2730 * Handle SyncCR3 pool tasks
2731 *
2732 * @returns VBox status code.
2733 * @retval VINF_SUCCESS if successfully added.
2734 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2735 * @param pVCpu The cross context virtual CPU structure.
2736 * @remark Should only be used when monitoring is available, thus placed in
2737 * the PGMPOOL_WITH_MONITORING \#ifdef.
2738 */
2739int pgmPoolSyncCR3(PVMCPUCC pVCpu)
2740{
2741 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2742 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2743
2744 /*
2745 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2746 * Occasionally we will have to clear all the shadow page tables because we wanted
2747 * to monitor a page which was mapped by too many shadowed page tables. This operation
2748 * sometimes referred to as a 'lightweight flush'.
2749 */
2750# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2751 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2752 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2753# else /* !IN_RING3 */
2754 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2755 {
2756 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2757 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2758
2759 /* Make sure all other VCPUs return to ring 3. */
2760 if (pVM->cCpus > 1)
2761 {
2762 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2763 PGM_INVL_ALL_VCPU_TLBS(pVM);
2764 }
2765 return VINF_PGM_SYNC_CR3;
2766 }
2767# endif /* !IN_RING3 */
2768 else
2769 {
2770 pgmPoolMonitorModifiedClearAll(pVM);
2771
2772 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2773 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2774 {
2775 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2776 return pgmPoolSyncCR3(pVCpu);
2777 }
2778 }
2779 return VINF_SUCCESS;
2780}
2781
2782
2783/**
2784 * Frees up at least one user entry.
2785 *
2786 * @returns VBox status code.
2787 * @retval VINF_SUCCESS if successfully added.
2788 *
2789 * @param pPool The pool.
2790 * @param iUser The user index.
2791 */
2792static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2793{
2794 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2795 /*
2796 * Just free cached pages in a braindead fashion.
2797 */
2798 /** @todo walk the age list backwards and free the first with usage. */
2799 int rc = VINF_SUCCESS;
2800 do
2801 {
2802 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2803 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2804 rc = rc2;
2805 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2806 return rc;
2807}
2808
2809
2810/**
2811 * Inserts a page into the cache.
2812 *
2813 * This will create user node for the page, insert it into the GCPhys
2814 * hash, and insert it into the age list.
2815 *
2816 * @returns VBox status code.
2817 * @retval VINF_SUCCESS if successfully added.
2818 *
2819 * @param pPool The pool.
2820 * @param pPage The cached page.
2821 * @param GCPhys The GC physical address of the page we're gonna shadow.
2822 * @param iUser The user index.
2823 * @param iUserTable The user table index.
2824 */
2825DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2826{
2827 int rc = VINF_SUCCESS;
2828 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2829
2830 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable)); RT_NOREF_PV(GCPhys);
2831
2832 if (iUser != NIL_PGMPOOL_IDX)
2833 {
2834#ifdef VBOX_STRICT
2835 /*
2836 * Check that the entry doesn't already exists.
2837 */
2838 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2839 {
2840 uint16_t i = pPage->iUserHead;
2841 do
2842 {
2843 Assert(i < pPool->cMaxUsers);
2844 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2845 i = paUsers[i].iNext;
2846 } while (i != NIL_PGMPOOL_USER_INDEX);
2847 }
2848#endif
2849
2850 /*
2851 * Find free a user node.
2852 */
2853 uint16_t i = pPool->iUserFreeHead;
2854 if (i == NIL_PGMPOOL_USER_INDEX)
2855 {
2856 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2857 if (RT_FAILURE(rc))
2858 return rc;
2859 i = pPool->iUserFreeHead;
2860 }
2861
2862 /*
2863 * Unlink the user node from the free list,
2864 * initialize and insert it into the user list.
2865 */
2866 pPool->iUserFreeHead = paUsers[i].iNext;
2867 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2868 paUsers[i].iUser = iUser;
2869 paUsers[i].iUserTable = iUserTable;
2870 pPage->iUserHead = i;
2871 }
2872 else
2873 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2874
2875
2876 /*
2877 * Insert into cache and enable monitoring of the guest page if enabled.
2878 *
2879 * Until we implement caching of all levels, including the CR3 one, we'll
2880 * have to make sure we don't try monitor & cache any recursive reuse of
2881 * a monitored CR3 page. Because all windows versions are doing this we'll
2882 * have to be able to do combined access monitoring, CR3 + PT and
2883 * PD + PT (guest PAE).
2884 *
2885 * Update:
2886 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2887 */
2888 const bool fCanBeMonitored = true;
2889 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2890 if (fCanBeMonitored)
2891 {
2892 rc = pgmPoolMonitorInsert(pPool, pPage);
2893 AssertRC(rc);
2894 }
2895 return rc;
2896}
2897
2898
2899/**
2900 * Adds a user reference to a page.
2901 *
2902 * This will move the page to the head of the
2903 *
2904 * @returns VBox status code.
2905 * @retval VINF_SUCCESS if successfully added.
2906 *
2907 * @param pPool The pool.
2908 * @param pPage The cached page.
2909 * @param iUser The user index.
2910 * @param iUserTable The user table.
2911 */
2912static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2913{
2914 Log3(("pgmPoolTrackAddUser: GCPhys=%RGp iUser=%x iUserTable=%x\n", pPage->GCPhys, iUser, iUserTable));
2915 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2916 Assert(iUser != NIL_PGMPOOL_IDX);
2917
2918# ifdef VBOX_STRICT
2919 /*
2920 * Check that the entry doesn't already exists. We only allow multiple
2921 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
2922 */
2923 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2924 {
2925 uint16_t i = pPage->iUserHead;
2926 do
2927 {
2928 Assert(i < pPool->cMaxUsers);
2929 /** @todo this assertion looks odd... Shouldn't it be && here? */
2930 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2931 i = paUsers[i].iNext;
2932 } while (i != NIL_PGMPOOL_USER_INDEX);
2933 }
2934# endif
2935
2936 /*
2937 * Allocate a user node.
2938 */
2939 uint16_t i = pPool->iUserFreeHead;
2940 if (i == NIL_PGMPOOL_USER_INDEX)
2941 {
2942 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2943 if (RT_FAILURE(rc))
2944 return rc;
2945 i = pPool->iUserFreeHead;
2946 }
2947 pPool->iUserFreeHead = paUsers[i].iNext;
2948
2949 /*
2950 * Initialize the user node and insert it.
2951 */
2952 paUsers[i].iNext = pPage->iUserHead;
2953 paUsers[i].iUser = iUser;
2954 paUsers[i].iUserTable = iUserTable;
2955 pPage->iUserHead = i;
2956
2957# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2958 if (pPage->fDirty)
2959 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirtyEntry, false /* do not remove */);
2960# endif
2961
2962 /*
2963 * Tell the cache to update its replacement stats for this page.
2964 */
2965 pgmPoolCacheUsed(pPool, pPage);
2966 return VINF_SUCCESS;
2967}
2968
2969
2970/**
2971 * Frees a user record associated with a page.
2972 *
2973 * This does not clear the entry in the user table, it simply replaces the
2974 * user record to the chain of free records.
2975 *
2976 * @param pPool The pool.
2977 * @param pPage The shadow page.
2978 * @param iUser The shadow page pool index of the user table.
2979 * @param iUserTable The index into the user table (shadowed).
2980 *
2981 * @remarks Don't call this for root pages.
2982 */
2983static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2984{
2985 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2986 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2987 Assert(iUser != NIL_PGMPOOL_IDX);
2988
2989 /*
2990 * Unlink and free the specified user entry.
2991 */
2992
2993 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2994 uint16_t i = pPage->iUserHead;
2995 if ( i != NIL_PGMPOOL_USER_INDEX
2996 && paUsers[i].iUser == iUser
2997 && paUsers[i].iUserTable == iUserTable)
2998 {
2999 pPage->iUserHead = paUsers[i].iNext;
3000
3001 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3002 paUsers[i].iNext = pPool->iUserFreeHead;
3003 pPool->iUserFreeHead = i;
3004 return;
3005 }
3006
3007 /* General: Linear search. */
3008 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
3009 while (i != NIL_PGMPOOL_USER_INDEX)
3010 {
3011 if ( paUsers[i].iUser == iUser
3012 && paUsers[i].iUserTable == iUserTable)
3013 {
3014 if (iPrev != NIL_PGMPOOL_USER_INDEX)
3015 paUsers[iPrev].iNext = paUsers[i].iNext;
3016 else
3017 pPage->iUserHead = paUsers[i].iNext;
3018
3019 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3020 paUsers[i].iNext = pPool->iUserFreeHead;
3021 pPool->iUserFreeHead = i;
3022 return;
3023 }
3024 iPrev = i;
3025 i = paUsers[i].iNext;
3026 }
3027
3028 /* Fatal: didn't find it */
3029 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
3030 iUser, iUserTable, pPage->GCPhys));
3031}
3032
3033
3034#if 0 /* unused */
3035/**
3036 * Gets the entry size of a shadow table.
3037 *
3038 * @param enmKind The kind of page.
3039 *
3040 * @returns The size of the entry in bytes. That is, 4 or 8.
3041 * @returns If the kind is not for a table, an assertion is raised and 0 is
3042 * returned.
3043 */
3044DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
3045{
3046 switch (enmKind)
3047 {
3048 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3049 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3050 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3051 case PGMPOOLKIND_32BIT_PD:
3052 case PGMPOOLKIND_32BIT_PD_PHYS:
3053 return 4;
3054
3055 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3056 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3057 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3058 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3059 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3060 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3061 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3062 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3063 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3064 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3065 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3066 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3067 case PGMPOOLKIND_64BIT_PML4:
3068 case PGMPOOLKIND_PAE_PDPT:
3069 case PGMPOOLKIND_ROOT_NESTED:
3070 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3071 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3072 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3073 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3074 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3075 case PGMPOOLKIND_PAE_PD_PHYS:
3076 case PGMPOOLKIND_PAE_PDPT_PHYS:
3077 return 8;
3078
3079 default:
3080 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3081 }
3082}
3083#endif /* unused */
3084
3085#if 0 /* unused */
3086/**
3087 * Gets the entry size of a guest table.
3088 *
3089 * @param enmKind The kind of page.
3090 *
3091 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3092 * @returns If the kind is not for a table, an assertion is raised and 0 is
3093 * returned.
3094 */
3095DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3096{
3097 switch (enmKind)
3098 {
3099 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3100 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3101 case PGMPOOLKIND_32BIT_PD:
3102 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3103 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3104 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3105 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3106 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3107 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3108 return 4;
3109
3110 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3111 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3112 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3113 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3114 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3115 case PGMPOOLKIND_64BIT_PML4:
3116 case PGMPOOLKIND_PAE_PDPT:
3117 return 8;
3118
3119 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3120 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3121 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3122 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3123 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3124 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3125 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3126 case PGMPOOLKIND_ROOT_NESTED:
3127 case PGMPOOLKIND_PAE_PD_PHYS:
3128 case PGMPOOLKIND_PAE_PDPT_PHYS:
3129 case PGMPOOLKIND_32BIT_PD_PHYS:
3130 /** @todo can we return 0? (nobody is calling this...) */
3131 AssertFailed();
3132 return 0;
3133
3134 default:
3135 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3136 }
3137}
3138#endif /* unused */
3139
3140
3141/**
3142 * Checks one shadow page table entry for a mapping of a physical page.
3143 *
3144 * @returns true / false indicating removal of all relevant PTEs
3145 *
3146 * @param pVM The cross context VM structure.
3147 * @param pPhysPage The guest page in question.
3148 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3149 * @param iShw The shadow page table.
3150 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3151 */
3152static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3153{
3154 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3155 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3156 bool fRet = false;
3157
3158 /*
3159 * Assert sanity.
3160 */
3161 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3162 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3163 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3164
3165 /*
3166 * Then, clear the actual mappings to the page in the shadow PT.
3167 */
3168 switch (pPage->enmKind)
3169 {
3170 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3171 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3172 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3173 {
3174 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3175 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3176 uint32_t u32AndMask = 0;
3177 uint32_t u32OrMask = 0;
3178
3179 if (!fFlushPTEs)
3180 {
3181 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3182 {
3183 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3184 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3185 u32OrMask = X86_PTE_RW;
3186 u32AndMask = UINT32_MAX;
3187 fRet = true;
3188 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3189 break;
3190
3191 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3192 u32OrMask = 0;
3193 u32AndMask = ~X86_PTE_RW;
3194 fRet = true;
3195 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3196 break;
3197 default:
3198 /* (shouldn't be here, will assert below) */
3199 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3200 break;
3201 }
3202 }
3203 else
3204 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3205
3206 /* Update the counter if we're removing references. */
3207 if (!u32AndMask)
3208 {
3209 Assert(pPage->cPresent);
3210 Assert(pPool->cPresent);
3211 pPage->cPresent--;
3212 pPool->cPresent--;
3213 }
3214
3215 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3216 {
3217 X86PTE Pte;
3218
3219 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3220 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3221 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3222 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3223
3224 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3225 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3226 return fRet;
3227 }
3228#ifdef LOG_ENABLED
3229 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3230 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3231 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3232 {
3233 Log(("i=%d cFound=%d\n", i, ++cFound));
3234 }
3235#endif
3236 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3237 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3238 break;
3239 }
3240
3241 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3242 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3243 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3244 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3245 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3246 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3247 {
3248 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3249 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3250 uint64_t u64OrMask = 0;
3251 uint64_t u64AndMask = 0;
3252
3253 if (!fFlushPTEs)
3254 {
3255 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3256 {
3257 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3258 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3259 u64OrMask = X86_PTE_RW;
3260 u64AndMask = UINT64_MAX;
3261 fRet = true;
3262 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3263 break;
3264
3265 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3266 u64OrMask = 0;
3267 u64AndMask = ~(uint64_t)X86_PTE_RW;
3268 fRet = true;
3269 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3270 break;
3271
3272 default:
3273 /* (shouldn't be here, will assert below) */
3274 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3275 break;
3276 }
3277 }
3278 else
3279 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3280
3281 /* Update the counter if we're removing references. */
3282 if (!u64AndMask)
3283 {
3284 Assert(pPage->cPresent);
3285 Assert(pPool->cPresent);
3286 pPage->cPresent--;
3287 pPool->cPresent--;
3288 }
3289
3290 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3291 {
3292 X86PTEPAE Pte;
3293
3294 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3295 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3296 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3297 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3298
3299 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3300 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3301 return fRet;
3302 }
3303#ifdef LOG_ENABLED
3304 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3305 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3306 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3307 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3308 Log(("i=%d cFound=%d\n", i, ++cFound));
3309#endif
3310 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3311 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3312 break;
3313 }
3314
3315#ifdef PGM_WITH_LARGE_PAGES
3316 /* Large page case only. */
3317 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3318 {
3319 Assert(pVM->pgm.s.fNestedPaging);
3320
3321 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3322 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3323
3324 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3325 {
3326 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3327 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3328 pPD->a[iPte].u = 0;
3329 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3330
3331 /* Update the counter as we're removing references. */
3332 Assert(pPage->cPresent);
3333 Assert(pPool->cPresent);
3334 pPage->cPresent--;
3335 pPool->cPresent--;
3336
3337 return fRet;
3338 }
3339# ifdef LOG_ENABLED
3340 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3341 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3342 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3343 Log(("i=%d cFound=%d\n", i, ++cFound));
3344# endif
3345 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3346 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3347 break;
3348 }
3349
3350 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3351 case PGMPOOLKIND_PAE_PD_PHYS:
3352 {
3353 Assert(pVM->pgm.s.fNestedPaging);
3354
3355 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3356 PX86PD pPD = (PX86PD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3357
3358 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3359 {
3360 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3361 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3362 pPD->a[iPte].u = 0;
3363 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3364
3365 /* Update the counter as we're removing references. */
3366 Assert(pPage->cPresent);
3367 Assert(pPool->cPresent);
3368 pPage->cPresent--;
3369 pPool->cPresent--;
3370 return fRet;
3371 }
3372# ifdef LOG_ENABLED
3373 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3374 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3375 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3376 Log(("i=%d cFound=%d\n", i, ++cFound));
3377# endif
3378 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3379 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3380 break;
3381 }
3382#endif /* PGM_WITH_LARGE_PAGES */
3383
3384 default:
3385 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3386 }
3387
3388 /* not reached. */
3389#ifndef _MSC_VER
3390 return fRet;
3391#endif
3392}
3393
3394
3395/**
3396 * Scans one shadow page table for mappings of a physical page.
3397 *
3398 * @param pVM The cross context VM structure.
3399 * @param pPhysPage The guest page in question.
3400 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3401 * @param iShw The shadow page table.
3402 */
3403static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3404{
3405 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3406
3407 /* We should only come here with when there's only one reference to this physical page. */
3408 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3409
3410 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3411 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3412 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3413 if (!fKeptPTEs)
3414 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3415 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3416}
3417
3418
3419/**
3420 * Flushes a list of shadow page tables mapping the same physical page.
3421 *
3422 * @param pVM The cross context VM structure.
3423 * @param pPhysPage The guest page in question.
3424 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3425 * @param iPhysExt The physical cross reference extent list to flush.
3426 */
3427static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3428{
3429 PGM_LOCK_ASSERT_OWNER(pVM);
3430 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3431 bool fKeepList = false;
3432
3433 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3434 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt=%u\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3435
3436 const uint16_t iPhysExtStart = iPhysExt;
3437 PPGMPOOLPHYSEXT pPhysExt;
3438 do
3439 {
3440 Assert(iPhysExt < pPool->cMaxPhysExts);
3441 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3442 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3443 {
3444 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3445 {
3446 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3447 if (!fKeptPTEs)
3448 {
3449 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3450 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3451 }
3452 else
3453 fKeepList = true;
3454 }
3455 }
3456 /* next */
3457 iPhysExt = pPhysExt->iNext;
3458 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3459
3460 if (!fKeepList)
3461 {
3462 /* insert the list into the free list and clear the ram range entry. */
3463 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3464 pPool->iPhysExtFreeHead = iPhysExtStart;
3465 /* Invalidate the tracking data. */
3466 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3467 }
3468
3469 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3470}
3471
3472
3473/**
3474 * Flushes all shadow page table mappings of the given guest page.
3475 *
3476 * This is typically called when the host page backing the guest one has been
3477 * replaced or when the page protection was changed due to a guest access
3478 * caught by the monitoring.
3479 *
3480 * @returns VBox status code.
3481 * @retval VINF_SUCCESS if all references has been successfully cleared.
3482 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3483 * pool cleaning. FF and sync flags are set.
3484 *
3485 * @param pVM The cross context VM structure.
3486 * @param GCPhysPage GC physical address of the page in question
3487 * @param pPhysPage The guest page in question.
3488 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3489 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3490 * flushed, it is NOT touched if this isn't necessary.
3491 * The caller MUST initialized this to @a false.
3492 */
3493int pgmPoolTrackUpdateGCPhys(PVMCC pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3494{
3495 PVMCPUCC pVCpu = VMMGetCpu(pVM);
3496 pgmLock(pVM);
3497 int rc = VINF_SUCCESS;
3498
3499#ifdef PGM_WITH_LARGE_PAGES
3500 /* Is this page part of a large page? */
3501 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3502 {
3503 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3504 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3505
3506 /* Fetch the large page base. */
3507 PPGMPAGE pLargePage;
3508 if (GCPhysBase != GCPhysPage)
3509 {
3510 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3511 AssertFatal(pLargePage);
3512 }
3513 else
3514 pLargePage = pPhysPage;
3515
3516 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3517
3518 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3519 {
3520 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3521 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3522 pVM->pgm.s.cLargePagesDisabled++;
3523
3524 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3525 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3526
3527 *pfFlushTLBs = true;
3528 pgmUnlock(pVM);
3529 return rc;
3530 }
3531 }
3532#else
3533 NOREF(GCPhysPage);
3534#endif /* PGM_WITH_LARGE_PAGES */
3535
3536 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3537 if (u16)
3538 {
3539 /*
3540 * The zero page is currently screwing up the tracking and we'll
3541 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3542 * is defined, zero pages won't normally be mapped. Some kind of solution
3543 * will be needed for this problem of course, but it will have to wait...
3544 */
3545 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3546 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3547 rc = VINF_PGM_GCPHYS_ALIASED;
3548 else
3549 {
3550# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 /** @todo we can drop this now. */
3551 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3552 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3553 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
3554# endif
3555
3556 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3557 {
3558 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3559 pgmPoolTrackFlushGCPhysPT(pVM,
3560 pPhysPage,
3561 fFlushPTEs,
3562 PGMPOOL_TD_GET_IDX(u16));
3563 }
3564 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3565 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3566 else
3567 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3568 *pfFlushTLBs = true;
3569
3570# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3571 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
3572# endif
3573 }
3574 }
3575
3576 if (rc == VINF_PGM_GCPHYS_ALIASED)
3577 {
3578 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3579 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3580 rc = VINF_PGM_SYNC_CR3;
3581 }
3582 pgmUnlock(pVM);
3583 return rc;
3584}
3585
3586
3587/**
3588 * Scans all shadow page tables for mappings of a physical page.
3589 *
3590 * This may be slow, but it's most likely more efficient than cleaning
3591 * out the entire page pool / cache.
3592 *
3593 * @returns VBox status code.
3594 * @retval VINF_SUCCESS if all references has been successfully cleared.
3595 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3596 * a page pool cleaning.
3597 *
3598 * @param pVM The cross context VM structure.
3599 * @param pPhysPage The guest page in question.
3600 */
3601int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage)
3602{
3603 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3604 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3605 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3606 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3607
3608 /*
3609 * There is a limit to what makes sense.
3610 */
3611 if ( pPool->cPresent > 1024
3612 && pVM->cCpus == 1)
3613 {
3614 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3615 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3616 return VINF_PGM_GCPHYS_ALIASED;
3617 }
3618
3619 /*
3620 * Iterate all the pages until we've encountered all that in use.
3621 * This is simple but not quite optimal solution.
3622 */
3623 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P; /** @todo drop X86_PTE_P here as we always test if present separately, anyway. */
3624 const uint32_t u32 = u64; /** @todo move into the 32BIT_PT_xx case */
3625 unsigned cLeft = pPool->cUsedPages;
3626 unsigned iPage = pPool->cCurPages;
3627 while (--iPage >= PGMPOOL_IDX_FIRST)
3628 {
3629 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3630 if ( pPage->GCPhys != NIL_RTGCPHYS
3631 && pPage->cPresent)
3632 {
3633 switch (pPage->enmKind)
3634 {
3635 /*
3636 * We only care about shadow page tables.
3637 */
3638 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3639 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3640 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3641 {
3642 unsigned cPresent = pPage->cPresent;
3643 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3644 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3645 if (pPT->a[i].n.u1Present)
3646 {
3647 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3648 {
3649 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3650 pPT->a[i].u = 0;
3651
3652 /* Update the counter as we're removing references. */
3653 Assert(pPage->cPresent);
3654 Assert(pPool->cPresent);
3655 pPage->cPresent--;
3656 pPool->cPresent--;
3657 }
3658 if (!--cPresent)
3659 break;
3660 }
3661 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3662 break;
3663 }
3664
3665 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3666 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3667 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3668 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3669 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3670 {
3671 unsigned cPresent = pPage->cPresent;
3672 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3673 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3674 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3675 {
3676 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3677 {
3678 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3679 PGMSHWPTEPAE_SET(pPT->a[i], 0); /// @todo why not atomic?
3680
3681 /* Update the counter as we're removing references. */
3682 Assert(pPage->cPresent);
3683 Assert(pPool->cPresent);
3684 pPage->cPresent--;
3685 pPool->cPresent--;
3686 }
3687 if (!--cPresent)
3688 break;
3689 }
3690 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3691 break;
3692 }
3693
3694 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3695 {
3696 unsigned cPresent = pPage->cPresent;
3697 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3698 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3699 if (pPT->a[i].n.u1Present)
3700 {
3701 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3702 {
3703 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3704 pPT->a[i].u = 0;
3705
3706 /* Update the counter as we're removing references. */
3707 Assert(pPage->cPresent);
3708 Assert(pPool->cPresent);
3709 pPage->cPresent--;
3710 pPool->cPresent--;
3711 }
3712 if (!--cPresent)
3713 break;
3714 }
3715 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3716 break;
3717 }
3718 }
3719
3720 if (!--cLeft)
3721 break;
3722 }
3723 }
3724
3725 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3726 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3727
3728 /*
3729 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3730 */
3731 if (pPool->cPresent > 1024)
3732 {
3733 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3734 return VINF_PGM_GCPHYS_ALIASED;
3735 }
3736
3737 return VINF_SUCCESS;
3738}
3739
3740
3741/**
3742 * Clears the user entry in a user table.
3743 *
3744 * This is used to remove all references to a page when flushing it.
3745 */
3746static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3747{
3748 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3749 Assert(pUser->iUser < pPool->cCurPages);
3750 uint32_t iUserTable = pUser->iUserTable;
3751
3752 /*
3753 * Map the user page. Ignore references made by fictitious pages.
3754 */
3755 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3756 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3757 union
3758 {
3759 uint64_t *pau64;
3760 uint32_t *pau32;
3761 } u;
3762 if (pUserPage->idx < PGMPOOL_IDX_FIRST)
3763 {
3764 Assert(!pUserPage->pvPageR3);
3765 return;
3766 }
3767 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3768
3769
3770 /* Safety precaution in case we change the paging for other modes too in the future. */
3771 Assert(!pgmPoolIsPageLocked(pPage)); RT_NOREF_PV(pPage);
3772
3773#ifdef VBOX_STRICT
3774 /*
3775 * Some sanity checks.
3776 */
3777 switch (pUserPage->enmKind)
3778 {
3779 case PGMPOOLKIND_32BIT_PD:
3780 case PGMPOOLKIND_32BIT_PD_PHYS:
3781 Assert(iUserTable < X86_PG_ENTRIES);
3782 break;
3783 case PGMPOOLKIND_PAE_PDPT:
3784 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3785 case PGMPOOLKIND_PAE_PDPT_PHYS:
3786 Assert(iUserTable < 4);
3787 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3788 break;
3789 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3790 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3791 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3792 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3793 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3794 case PGMPOOLKIND_PAE_PD_PHYS:
3795 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3796 break;
3797 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3798 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3799 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3800 break;
3801 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3802 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3803 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3804 break;
3805 case PGMPOOLKIND_64BIT_PML4:
3806 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3807 /* GCPhys >> PAGE_SHIFT is the index here */
3808 break;
3809 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3810 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3811 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3812 break;
3813
3814 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3815 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3816 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3817 break;
3818
3819 case PGMPOOLKIND_ROOT_NESTED:
3820 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3821 break;
3822
3823 default:
3824 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3825 break;
3826 }
3827#endif /* VBOX_STRICT */
3828
3829 /*
3830 * Clear the entry in the user page.
3831 */
3832 switch (pUserPage->enmKind)
3833 {
3834 /* 32-bit entries */
3835 case PGMPOOLKIND_32BIT_PD:
3836 case PGMPOOLKIND_32BIT_PD_PHYS:
3837 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
3838 break;
3839
3840 /* 64-bit entries */
3841 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3842 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3843 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3844 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3845 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3846 case PGMPOOLKIND_PAE_PD_PHYS:
3847 case PGMPOOLKIND_PAE_PDPT_PHYS:
3848 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3849 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3850 case PGMPOOLKIND_64BIT_PML4:
3851 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3852 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3853 case PGMPOOLKIND_PAE_PDPT:
3854 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3855 case PGMPOOLKIND_ROOT_NESTED:
3856 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3857 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3858 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
3859 break;
3860
3861 default:
3862 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3863 }
3864 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
3865}
3866
3867
3868/**
3869 * Clears all users of a page.
3870 */
3871static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3872{
3873 /*
3874 * Free all the user records.
3875 */
3876 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3877
3878 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3879 uint16_t i = pPage->iUserHead;
3880 while (i != NIL_PGMPOOL_USER_INDEX)
3881 {
3882 /* Clear enter in user table. */
3883 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3884
3885 /* Free it. */
3886 const uint16_t iNext = paUsers[i].iNext;
3887 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3888 paUsers[i].iNext = pPool->iUserFreeHead;
3889 pPool->iUserFreeHead = i;
3890
3891 /* Next. */
3892 i = iNext;
3893 }
3894 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3895}
3896
3897
3898/**
3899 * Allocates a new physical cross reference extent.
3900 *
3901 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3902 * @param pVM The cross context VM structure.
3903 * @param piPhysExt Where to store the phys ext index.
3904 */
3905PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3906{
3907 PGM_LOCK_ASSERT_OWNER(pVM);
3908 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3909 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3910 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3911 {
3912 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3913 return NULL;
3914 }
3915 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3916 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3917 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3918 *piPhysExt = iPhysExt;
3919 return pPhysExt;
3920}
3921
3922
3923/**
3924 * Frees a physical cross reference extent.
3925 *
3926 * @param pVM The cross context VM structure.
3927 * @param iPhysExt The extent to free.
3928 */
3929void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3930{
3931 PGM_LOCK_ASSERT_OWNER(pVM);
3932 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3933 Assert(iPhysExt < pPool->cMaxPhysExts);
3934 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3935 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3936 {
3937 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3938 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3939 }
3940 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3941 pPool->iPhysExtFreeHead = iPhysExt;
3942}
3943
3944
3945/**
3946 * Frees a physical cross reference extent.
3947 *
3948 * @param pVM The cross context VM structure.
3949 * @param iPhysExt The extent to free.
3950 */
3951void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3952{
3953 PGM_LOCK_ASSERT_OWNER(pVM);
3954 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3955
3956 const uint16_t iPhysExtStart = iPhysExt;
3957 PPGMPOOLPHYSEXT pPhysExt;
3958 do
3959 {
3960 Assert(iPhysExt < pPool->cMaxPhysExts);
3961 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3962 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3963 {
3964 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3965 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3966 }
3967
3968 /* next */
3969 iPhysExt = pPhysExt->iNext;
3970 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3971
3972 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3973 pPool->iPhysExtFreeHead = iPhysExtStart;
3974}
3975
3976
3977/**
3978 * Insert a reference into a list of physical cross reference extents.
3979 *
3980 * @returns The new tracking data for PGMPAGE.
3981 *
3982 * @param pVM The cross context VM structure.
3983 * @param iPhysExt The physical extent index of the list head.
3984 * @param iShwPT The shadow page table index.
3985 * @param iPte Page table entry
3986 *
3987 */
3988static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
3989{
3990 PGM_LOCK_ASSERT_OWNER(pVM);
3991 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3992 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3993
3994 /*
3995 * Special common cases.
3996 */
3997 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
3998 {
3999 paPhysExts[iPhysExt].aidx[1] = iShwPT;
4000 paPhysExts[iPhysExt].apte[1] = iPte;
4001 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4002 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
4003 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4004 }
4005 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
4006 {
4007 paPhysExts[iPhysExt].aidx[2] = iShwPT;
4008 paPhysExts[iPhysExt].apte[2] = iPte;
4009 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4010 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
4011 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4012 }
4013 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
4014
4015 /*
4016 * General treatment.
4017 */
4018 const uint16_t iPhysExtStart = iPhysExt;
4019 unsigned cMax = 15;
4020 for (;;)
4021 {
4022 Assert(iPhysExt < pPool->cMaxPhysExts);
4023 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4024 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
4025 {
4026 paPhysExts[iPhysExt].aidx[i] = iShwPT;
4027 paPhysExts[iPhysExt].apte[i] = iPte;
4028 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4029 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
4030 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
4031 }
4032 if (!--cMax)
4033 {
4034 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackOverflows);
4035 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4036 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
4037 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4038 }
4039
4040 /* advance */
4041 iPhysExt = paPhysExts[iPhysExt].iNext;
4042 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4043 break;
4044 }
4045
4046 /*
4047 * Add another extent to the list.
4048 */
4049 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4050 if (!pNew)
4051 {
4052 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackNoExtentsLeft);
4053 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4054 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4055 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4056 }
4057 pNew->iNext = iPhysExtStart;
4058 pNew->aidx[0] = iShwPT;
4059 pNew->apte[0] = iPte;
4060 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4061 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4062}
4063
4064
4065/**
4066 * Add a reference to guest physical page where extents are in use.
4067 *
4068 * @returns The new tracking data for PGMPAGE.
4069 *
4070 * @param pVM The cross context VM structure.
4071 * @param pPhysPage Pointer to the aPages entry in the ram range.
4072 * @param u16 The ram range flags (top 16-bits).
4073 * @param iShwPT The shadow page table index.
4074 * @param iPte Page table entry
4075 */
4076uint16_t pgmPoolTrackPhysExtAddref(PVMCC pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4077{
4078 pgmLock(pVM);
4079 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4080 {
4081 /*
4082 * Convert to extent list.
4083 */
4084 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4085 uint16_t iPhysExt;
4086 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4087 if (pPhysExt)
4088 {
4089 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4090 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliased);
4091 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4092 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4093 pPhysExt->aidx[1] = iShwPT;
4094 pPhysExt->apte[1] = iPte;
4095 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4096 }
4097 else
4098 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4099 }
4100 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4101 {
4102 /*
4103 * Insert into the extent list.
4104 */
4105 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4106 }
4107 else
4108 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedLots);
4109 pgmUnlock(pVM);
4110 return u16;
4111}
4112
4113
4114/**
4115 * Clear references to guest physical memory.
4116 *
4117 * @param pPool The pool.
4118 * @param pPage The page.
4119 * @param pPhysPage Pointer to the aPages entry in the ram range.
4120 * @param iPte Shadow PTE index
4121 */
4122void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4123{
4124 PVMCC pVM = pPool->CTX_SUFF(pVM);
4125 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4126 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4127
4128 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4129 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4130 {
4131 pgmLock(pVM);
4132
4133 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4134 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4135 do
4136 {
4137 Assert(iPhysExt < pPool->cMaxPhysExts);
4138
4139 /*
4140 * Look for the shadow page and check if it's all freed.
4141 */
4142 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4143 {
4144 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4145 && paPhysExts[iPhysExt].apte[i] == iPte)
4146 {
4147 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4148 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4149
4150 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4151 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4152 {
4153 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4154 pgmUnlock(pVM);
4155 return;
4156 }
4157
4158 /* we can free the node. */
4159 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4160 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4161 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4162 {
4163 /* lonely node */
4164 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4165 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4166 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4167 }
4168 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4169 {
4170 /* head */
4171 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4172 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4173 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4174 }
4175 else
4176 {
4177 /* in list */
4178 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4179 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4180 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4181 }
4182 iPhysExt = iPhysExtNext;
4183 pgmUnlock(pVM);
4184 return;
4185 }
4186 }
4187
4188 /* next */
4189 iPhysExtPrev = iPhysExt;
4190 iPhysExt = paPhysExts[iPhysExt].iNext;
4191 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4192
4193 pgmUnlock(pVM);
4194 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4195 }
4196 else /* nothing to do */
4197 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4198}
4199
4200/**
4201 * Clear references to guest physical memory.
4202 *
4203 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4204 * physical address is assumed to be correct, so the linear search can be
4205 * skipped and we can assert at an earlier point.
4206 *
4207 * @param pPool The pool.
4208 * @param pPage The page.
4209 * @param HCPhys The host physical address corresponding to the guest page.
4210 * @param GCPhys The guest physical address corresponding to HCPhys.
4211 * @param iPte Shadow PTE index
4212 */
4213static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4214{
4215 /*
4216 * Lookup the page and check if it checks out before derefing it.
4217 */
4218 PVMCC pVM = pPool->CTX_SUFF(pVM);
4219 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4220 if (pPhysPage)
4221 {
4222 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4223#ifdef LOG_ENABLED
4224 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4225 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4226#endif
4227 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4228 {
4229 Assert(pPage->cPresent);
4230 Assert(pPool->cPresent);
4231 pPage->cPresent--;
4232 pPool->cPresent--;
4233 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4234 return;
4235 }
4236
4237 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp\n",
4238 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage)));
4239 }
4240 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4241}
4242
4243
4244/**
4245 * Clear references to guest physical memory.
4246 *
4247 * @param pPool The pool.
4248 * @param pPage The page.
4249 * @param HCPhys The host physical address corresponding to the guest page.
4250 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4251 * @param iPte Shadow pte index
4252 */
4253void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4254{
4255 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4256
4257 /*
4258 * Try the hint first.
4259 */
4260 RTHCPHYS HCPhysHinted;
4261 PVMCC pVM = pPool->CTX_SUFF(pVM);
4262 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4263 if (pPhysPage)
4264 {
4265 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4266 Assert(HCPhysHinted);
4267 if (HCPhysHinted == HCPhys)
4268 {
4269 Assert(pPage->cPresent);
4270 Assert(pPool->cPresent);
4271 pPage->cPresent--;
4272 pPool->cPresent--;
4273 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4274 return;
4275 }
4276 }
4277 else
4278 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4279
4280 /*
4281 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4282 */
4283 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4284 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX);
4285 while (pRam)
4286 {
4287 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4288 while (iPage-- > 0)
4289 {
4290 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4291 {
4292 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4293 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4294 Assert(pPage->cPresent);
4295 Assert(pPool->cPresent);
4296 pPage->cPresent--;
4297 pPool->cPresent--;
4298 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4299 return;
4300 }
4301 }
4302 pRam = pRam->CTX_SUFF(pNext);
4303 }
4304
4305 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4306}
4307
4308
4309/**
4310 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4311 *
4312 * @param pPool The pool.
4313 * @param pPage The page.
4314 * @param pShwPT The shadow page table (mapping of the page).
4315 * @param pGstPT The guest page table.
4316 */
4317DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4318{
4319 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4320 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4321 {
4322 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4323 if (pShwPT->a[i].n.u1Present)
4324 {
4325 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4326 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4327 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & fPgMask, i);
4328 if (!pPage->cPresent)
4329 break;
4330 }
4331 }
4332}
4333
4334
4335/**
4336 * Clear references to guest physical memory in a PAE / 32-bit page table.
4337 *
4338 * @param pPool The pool.
4339 * @param pPage The page.
4340 * @param pShwPT The shadow page table (mapping of the page).
4341 * @param pGstPT The guest page table (just a half one).
4342 */
4343DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4344{
4345 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4346 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4347 {
4348 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4349 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4350 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4351 {
4352 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4353 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4354 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4355 if (!pPage->cPresent)
4356 break;
4357 }
4358 }
4359}
4360
4361
4362/**
4363 * Clear references to guest physical memory in a PAE / PAE page table.
4364 *
4365 * @param pPool The pool.
4366 * @param pPage The page.
4367 * @param pShwPT The shadow page table (mapping of the page).
4368 * @param pGstPT The guest page table.
4369 */
4370DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4371{
4372 RTGCPHYS const fPgMask = pPage->fA20Enabled ? X86_PTE_PAE_PG_MASK : X86_PTE_PAE_PG_MASK & ~RT_BIT_64(20);
4373 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4374 {
4375 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4376 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4377 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4378 {
4379 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4380 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4381 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4382 if (!pPage->cPresent)
4383 break;
4384 }
4385 }
4386}
4387
4388
4389/**
4390 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4391 *
4392 * @param pPool The pool.
4393 * @param pPage The page.
4394 * @param pShwPT The shadow page table (mapping of the page).
4395 */
4396DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4397{
4398 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4399 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4400 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4401 {
4402 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4403 if (pShwPT->a[i].n.u1Present)
4404 {
4405 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4406 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4407 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4408 if (!pPage->cPresent)
4409 break;
4410 }
4411 }
4412}
4413
4414
4415/**
4416 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4417 *
4418 * @param pPool The pool.
4419 * @param pPage The page.
4420 * @param pShwPT The shadow page table (mapping of the page).
4421 */
4422DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4423{
4424 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4425 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4426 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4427 {
4428 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4429 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4430 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4431 {
4432 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4433 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4434 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys & GCPhysA20Mask, i);
4435 if (!pPage->cPresent)
4436 break;
4437 }
4438 }
4439}
4440
4441
4442/**
4443 * Clear references to shadowed pages in an EPT page table.
4444 *
4445 * @param pPool The pool.
4446 * @param pPage The page.
4447 * @param pShwPT The shadow page directory pointer table (mapping of the
4448 * page).
4449 */
4450DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4451{
4452 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4453 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4454 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4455 {
4456 Assert((pShwPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4457 if (pShwPT->a[i].n.u1Present)
4458 {
4459 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4460 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4461 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4462 if (!pPage->cPresent)
4463 break;
4464 }
4465 }
4466}
4467
4468
4469/**
4470 * Clear references to shadowed pages in a 32 bits page directory.
4471 *
4472 * @param pPool The pool.
4473 * @param pPage The page.
4474 * @param pShwPD The shadow page directory (mapping of the page).
4475 */
4476DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4477{
4478 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4479 {
4480 if ( pShwPD->a[i].n.u1Present
4481 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4482 )
4483 {
4484 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4485 if (pSubPage)
4486 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4487 else
4488 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4489 }
4490 }
4491}
4492
4493
4494/**
4495 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4496 *
4497 * @param pPool The pool.
4498 * @param pPage The page.
4499 * @param pShwPD The shadow page directory (mapping of the page).
4500 */
4501DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4502{
4503 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4504 {
4505 if ( pShwPD->a[i].n.u1Present
4506 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING))
4507 {
4508#ifdef PGM_WITH_LARGE_PAGES
4509 if (pShwPD->a[i].b.u1Size)
4510 {
4511 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4512 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4513 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK,
4514 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4515 i);
4516 }
4517 else
4518#endif
4519 {
4520 Assert((pShwPD->a[i].u & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000000))) == 0);
4521 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4522 if (pSubPage)
4523 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4524 else
4525 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4526 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4527 }
4528 }
4529 }
4530}
4531
4532
4533/**
4534 * Clear references to shadowed pages in a PAE page directory pointer table.
4535 *
4536 * @param pPool The pool.
4537 * @param pPage The page.
4538 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4539 */
4540DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4541{
4542 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4543 {
4544 Assert((pShwPDPT->a[i].u & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4545 if ( pShwPDPT->a[i].n.u1Present
4546 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4547 )
4548 {
4549 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4550 if (pSubPage)
4551 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4552 else
4553 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4554 }
4555 }
4556}
4557
4558
4559/**
4560 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4561 *
4562 * @param pPool The pool.
4563 * @param pPage The page.
4564 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4565 */
4566DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4567{
4568 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4569 {
4570 Assert((pShwPDPT->a[i].u & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4571 if (pShwPDPT->a[i].n.u1Present)
4572 {
4573 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4574 if (pSubPage)
4575 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4576 else
4577 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4578 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4579 }
4580 }
4581}
4582
4583
4584/**
4585 * Clear references to shadowed pages in a 64-bit level 4 page table.
4586 *
4587 * @param pPool The pool.
4588 * @param pPage The page.
4589 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4590 */
4591DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4592{
4593 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4594 {
4595 Assert((pShwPML4->a[i].u & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4596 if (pShwPML4->a[i].n.u1Present)
4597 {
4598 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4599 if (pSubPage)
4600 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4601 else
4602 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4603 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4604 }
4605 }
4606}
4607
4608
4609/**
4610 * Clear references to shadowed pages in an EPT page directory.
4611 *
4612 * @param pPool The pool.
4613 * @param pPage The page.
4614 * @param pShwPD The shadow page directory (mapping of the page).
4615 */
4616DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4617{
4618 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4619 {
4620 Assert((pShwPD->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4621 if (pShwPD->a[i].n.u1Present)
4622 {
4623#ifdef PGM_WITH_LARGE_PAGES
4624 if (pShwPD->a[i].b.u1Size)
4625 {
4626 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4627 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4628 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK,
4629 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4630 i);
4631 }
4632 else
4633#endif
4634 {
4635 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4636 if (pSubPage)
4637 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4638 else
4639 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4640 }
4641 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4642 }
4643 }
4644}
4645
4646
4647/**
4648 * Clear references to shadowed pages in an EPT page directory pointer table.
4649 *
4650 * @param pPool The pool.
4651 * @param pPage The page.
4652 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4653 */
4654DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4655{
4656 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4657 {
4658 Assert((pShwPDPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4659 if (pShwPDPT->a[i].n.u1Present)
4660 {
4661 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4662 if (pSubPage)
4663 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4664 else
4665 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4666 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4667 }
4668 }
4669}
4670
4671
4672/**
4673 * Clears all references made by this page.
4674 *
4675 * This includes other shadow pages and GC physical addresses.
4676 *
4677 * @param pPool The pool.
4678 * @param pPage The page.
4679 */
4680static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4681{
4682 /*
4683 * Map the shadow page and take action according to the page kind.
4684 */
4685 PVMCC pVM = pPool->CTX_SUFF(pVM);
4686 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4687 switch (pPage->enmKind)
4688 {
4689 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4690 {
4691 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4692 void *pvGst;
4693 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4694 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4695 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4696 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4697 break;
4698 }
4699
4700 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4701 {
4702 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4703 void *pvGst;
4704 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4705 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
4706 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4707 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4708 break;
4709 }
4710
4711 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4712 {
4713 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4714 void *pvGst;
4715 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4716 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
4717 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4718 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4719 break;
4720 }
4721
4722 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4723 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4724 {
4725 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4726 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4727 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4728 break;
4729 }
4730
4731 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4732 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4733 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4734 {
4735 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4736 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
4737 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4738 break;
4739 }
4740
4741 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4742 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4743 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4744 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4745 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4746 case PGMPOOLKIND_PAE_PD_PHYS:
4747 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4748 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4749 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4750 break;
4751
4752 case PGMPOOLKIND_32BIT_PD_PHYS:
4753 case PGMPOOLKIND_32BIT_PD:
4754 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4755 break;
4756
4757 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4758 case PGMPOOLKIND_PAE_PDPT:
4759 case PGMPOOLKIND_PAE_PDPT_PHYS:
4760 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4761 break;
4762
4763 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4764 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4765 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4766 break;
4767
4768 case PGMPOOLKIND_64BIT_PML4:
4769 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4770 break;
4771
4772 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4773 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4774 break;
4775
4776 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4777 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4778 break;
4779
4780 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4781 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4782 break;
4783
4784 default:
4785 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4786 }
4787
4788 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4789 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4790 ASMMemZeroPage(pvShw);
4791 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4792 pPage->fZeroed = true;
4793 Assert(!pPage->cPresent);
4794 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
4795}
4796
4797
4798/**
4799 * Flushes a pool page.
4800 *
4801 * This moves the page to the free list after removing all user references to it.
4802 *
4803 * @returns VBox status code.
4804 * @retval VINF_SUCCESS on success.
4805 * @param pPool The pool.
4806 * @param pPage The shadow page.
4807 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4808 */
4809int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4810{
4811 PVMCC pVM = pPool->CTX_SUFF(pVM);
4812 bool fFlushRequired = false;
4813
4814 int rc = VINF_SUCCESS;
4815 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4816 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4817 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4818
4819 /*
4820 * Reject any attempts at flushing any of the special root pages (shall
4821 * not happen).
4822 */
4823 AssertMsgReturn(pPage->idx >= PGMPOOL_IDX_FIRST,
4824 ("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n",
4825 pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx),
4826 VINF_SUCCESS);
4827
4828 pgmLock(pVM);
4829
4830 /*
4831 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4832 */
4833 if (pgmPoolIsPageLocked(pPage))
4834 {
4835 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4836 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4837 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4838 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4839 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4840 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4841 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4842 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4843 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
4844 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
4845 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4846 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4847 pgmUnlock(pVM);
4848 return VINF_SUCCESS;
4849 }
4850
4851#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4852 /* Start a subset so we won't run out of mapping space. */
4853 PVMCPU pVCpu = VMMGetCpu(pVM);
4854 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
4855#endif
4856
4857 /*
4858 * Mark the page as being in need of an ASMMemZeroPage().
4859 */
4860 pPage->fZeroed = false;
4861
4862#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4863 if (pPage->fDirty)
4864 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirtyEntry, false /* do not remove */);
4865#endif
4866
4867 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4868 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4869 fFlushRequired = true;
4870
4871 /*
4872 * Clear the page.
4873 */
4874 pgmPoolTrackClearPageUsers(pPool, pPage);
4875 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4876 pgmPoolTrackDeref(pPool, pPage);
4877 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4878
4879 /*
4880 * Flush it from the cache.
4881 */
4882 pgmPoolCacheFlushPage(pPool, pPage);
4883
4884#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4885 /* Heavy stuff done. */
4886 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
4887#endif
4888
4889 /*
4890 * Deregistering the monitoring.
4891 */
4892 if (pPage->fMonitored)
4893 rc = pgmPoolMonitorFlush(pPool, pPage);
4894
4895 /*
4896 * Free the page.
4897 */
4898 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4899 pPage->iNext = pPool->iFreeHead;
4900 pPool->iFreeHead = pPage->idx;
4901 pPage->enmKind = PGMPOOLKIND_FREE;
4902 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4903 pPage->GCPhys = NIL_RTGCPHYS;
4904 pPage->fReusedFlushPending = false;
4905
4906 pPool->cUsedPages--;
4907
4908 /* Flush the TLBs of all VCPUs if required. */
4909 if ( fFlushRequired
4910 && fFlush)
4911 {
4912 PGM_INVL_ALL_VCPU_TLBS(pVM);
4913 }
4914
4915 pgmUnlock(pVM);
4916 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4917 return rc;
4918}
4919
4920
4921/**
4922 * Frees a usage of a pool page.
4923 *
4924 * The caller is responsible to updating the user table so that it no longer
4925 * references the shadow page.
4926 *
4927 * @param pPool The pool.
4928 * @param pPage The shadow page.
4929 * @param iUser The shadow page pool index of the user table.
4930 * NIL_PGMPOOL_IDX for root pages.
4931 * @param iUserTable The index into the user table (shadowed). Ignored if
4932 * root page.
4933 */
4934void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4935{
4936 PVMCC pVM = pPool->CTX_SUFF(pVM);
4937
4938 STAM_PROFILE_START(&pPool->StatFree, a);
4939 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
4940 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4941 AssertReturnVoid(pPage->idx >= PGMPOOL_IDX_FIRST); /* paranoia (#6349) */
4942
4943 pgmLock(pVM);
4944 if (iUser != NIL_PGMPOOL_IDX)
4945 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4946 if (!pPage->fCached)
4947 pgmPoolFlushPage(pPool, pPage);
4948 pgmUnlock(pVM);
4949 STAM_PROFILE_STOP(&pPool->StatFree, a);
4950}
4951
4952
4953/**
4954 * Makes one or more free page free.
4955 *
4956 * @returns VBox status code.
4957 * @retval VINF_SUCCESS on success.
4958 *
4959 * @param pPool The pool.
4960 * @param enmKind Page table kind
4961 * @param iUser The user of the page.
4962 */
4963static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4964{
4965 PVMCC pVM = pPool->CTX_SUFF(pVM);
4966 LogFlow(("pgmPoolMakeMoreFreePages: enmKind=%d iUser=%d\n", enmKind, iUser));
4967 NOREF(enmKind);
4968
4969 /*
4970 * If the pool isn't full grown yet, expand it.
4971 */
4972 if (pPool->cCurPages < pPool->cMaxPages)
4973 {
4974 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4975#ifdef IN_RING3
4976 int rc = PGMR3PoolGrow(pVM, VMMGetCpu(pVM));
4977#else
4978 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
4979#endif
4980 if (RT_FAILURE(rc))
4981 return rc;
4982 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4983 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4984 return VINF_SUCCESS;
4985 }
4986
4987 /*
4988 * Free one cached page.
4989 */
4990 return pgmPoolCacheFreeOne(pPool, iUser);
4991}
4992
4993
4994/**
4995 * Allocates a page from the pool.
4996 *
4997 * This page may actually be a cached page and not in need of any processing
4998 * on the callers part.
4999 *
5000 * @returns VBox status code.
5001 * @retval VINF_SUCCESS if a NEW page was allocated.
5002 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
5003 *
5004 * @param pVM The cross context VM structure.
5005 * @param GCPhys The GC physical address of the page we're gonna shadow.
5006 * For 4MB and 2MB PD entries, it's the first address the
5007 * shadow PT is covering.
5008 * @param enmKind The kind of mapping.
5009 * @param enmAccess Access type for the mapping (only relevant for big pages)
5010 * @param fA20Enabled Whether the A20 gate is enabled or not.
5011 * @param iUser The shadow page pool index of the user table. Root
5012 * pages should pass NIL_PGMPOOL_IDX.
5013 * @param iUserTable The index into the user table (shadowed). Ignored for
5014 * root pages (iUser == NIL_PGMPOOL_IDX).
5015 * @param fLockPage Lock the page
5016 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
5017 */
5018int pgmPoolAlloc(PVMCC pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
5019 uint16_t iUser, uint32_t iUserTable, bool fLockPage, PPPGMPOOLPAGE ppPage)
5020{
5021 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5022 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
5023 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
5024 *ppPage = NULL;
5025 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
5026 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
5027 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
5028
5029 pgmLock(pVM);
5030
5031 if (pPool->fCacheEnabled)
5032 {
5033 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, fA20Enabled, iUser, iUserTable, ppPage);
5034 if (RT_SUCCESS(rc2))
5035 {
5036 if (fLockPage)
5037 pgmPoolLockPage(pPool, *ppPage);
5038 pgmUnlock(pVM);
5039 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5040 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
5041 return rc2;
5042 }
5043 }
5044
5045 /*
5046 * Allocate a new one.
5047 */
5048 int rc = VINF_SUCCESS;
5049 uint16_t iNew = pPool->iFreeHead;
5050 if (iNew == NIL_PGMPOOL_IDX)
5051 {
5052 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
5053 if (RT_FAILURE(rc))
5054 {
5055 pgmUnlock(pVM);
5056 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
5057 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5058 return rc;
5059 }
5060 iNew = pPool->iFreeHead;
5061 AssertReleaseMsgReturn(iNew != NIL_PGMPOOL_IDX, ("iNew=%#x\n", iNew), VERR_PGM_POOL_IPE);
5062 }
5063
5064 /* unlink the free head */
5065 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5066 pPool->iFreeHead = pPage->iNext;
5067 pPage->iNext = NIL_PGMPOOL_IDX;
5068
5069 /*
5070 * Initialize it.
5071 */
5072 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5073 pPage->enmKind = enmKind;
5074 pPage->enmAccess = enmAccess;
5075 pPage->GCPhys = GCPhys;
5076 pPage->fA20Enabled = fA20Enabled;
5077 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5078 pPage->fMonitored = false;
5079 pPage->fCached = false;
5080 pPage->fDirty = false;
5081 pPage->fReusedFlushPending = false;
5082 pPage->cModifications = 0;
5083 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5084 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5085 pPage->cPresent = 0;
5086 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5087 pPage->idxDirtyEntry = 0;
5088 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5089 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5090 pPage->cLastAccessHandler = 0;
5091 pPage->cLocked = 0;
5092# ifdef VBOX_STRICT
5093 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5094# endif
5095
5096 /*
5097 * Insert into the tracking and cache. If this fails, free the page.
5098 */
5099 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5100 if (RT_FAILURE(rc3))
5101 {
5102 pPool->cUsedPages--;
5103 pPage->enmKind = PGMPOOLKIND_FREE;
5104 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5105 pPage->GCPhys = NIL_RTGCPHYS;
5106 pPage->iNext = pPool->iFreeHead;
5107 pPool->iFreeHead = pPage->idx;
5108 pgmUnlock(pVM);
5109 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5110 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
5111 return rc3;
5112 }
5113
5114 /*
5115 * Commit the allocation, clear the page and return.
5116 */
5117#ifdef VBOX_WITH_STATISTICS
5118 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5119 pPool->cUsedPagesHigh = pPool->cUsedPages;
5120#endif
5121
5122 if (!pPage->fZeroed)
5123 {
5124 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5125 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5126 ASMMemZeroPage(pv);
5127 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5128 }
5129
5130 *ppPage = pPage;
5131 if (fLockPage)
5132 pgmPoolLockPage(pPool, pPage);
5133 pgmUnlock(pVM);
5134 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5135 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5136 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5137 return rc;
5138}
5139
5140
5141/**
5142 * Frees a usage of a pool page.
5143 *
5144 * @param pVM The cross context VM structure.
5145 * @param HCPhys The HC physical address of the shadow page.
5146 * @param iUser The shadow page pool index of the user table.
5147 * NIL_PGMPOOL_IDX if root page.
5148 * @param iUserTable The index into the user table (shadowed). Ignored if
5149 * root page.
5150 */
5151void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5152{
5153 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5154 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5155 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5156}
5157
5158
5159/**
5160 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5161 *
5162 * @returns Pointer to the shadow page structure.
5163 * @param pPool The pool.
5164 * @param HCPhys The HC physical address of the shadow page.
5165 */
5166PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5167{
5168 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5169
5170 /*
5171 * Look up the page.
5172 */
5173 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5174
5175 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5176 return pPage;
5177}
5178
5179
5180/**
5181 * Internal worker for finding a page for debugging purposes, no assertions.
5182 *
5183 * @returns Pointer to the shadow page structure. NULL on if not found.
5184 * @param pPool The pool.
5185 * @param HCPhys The HC physical address of the shadow page.
5186 */
5187PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5188{
5189 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5190 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5191}
5192
5193
5194/**
5195 * Internal worker for PGM_HCPHYS_2_PTR.
5196 *
5197 * @returns VBox status code.
5198 * @param pVM The cross context VM structure.
5199 * @param HCPhys The HC physical address of the shadow page.
5200 * @param ppv Where to return the address.
5201 */
5202int pgmPoolHCPhys2Ptr(PVM pVM, RTHCPHYS HCPhys, void **ppv)
5203{
5204 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pVM->pgm.s.CTX_SUFF(pPool)->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5205 AssertMsgReturn(pPage && pPage->enmKind != PGMPOOLKIND_FREE,
5206 ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0),
5207 VERR_PGM_POOL_GET_PAGE_FAILED);
5208 *ppv = (uint8_t *)pPage->CTX_SUFF(pvPage) + (HCPhys & PAGE_OFFSET_MASK);
5209 return VINF_SUCCESS;
5210}
5211
5212#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5213
5214/**
5215 * Flush the specified page if present
5216 *
5217 * @param pVM The cross context VM structure.
5218 * @param GCPhys Guest physical address of the page to flush
5219 */
5220void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5221{
5222 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5223
5224 VM_ASSERT_EMT(pVM);
5225
5226 /*
5227 * Look up the GCPhys in the hash.
5228 */
5229 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5230 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5231 if (i == NIL_PGMPOOL_IDX)
5232 return;
5233
5234 do
5235 {
5236 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5237 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5238 {
5239 switch (pPage->enmKind)
5240 {
5241 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5242 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5243 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5244 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5245 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5246 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5247 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5248 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5249 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5250 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5251 case PGMPOOLKIND_64BIT_PML4:
5252 case PGMPOOLKIND_32BIT_PD:
5253 case PGMPOOLKIND_PAE_PDPT:
5254 {
5255 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5256# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5257 if (pPage->fDirty)
5258 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5259 else
5260# endif
5261 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5262 Assert(!pgmPoolIsPageLocked(pPage));
5263 pgmPoolMonitorChainFlush(pPool, pPage);
5264 return;
5265 }
5266
5267 /* ignore, no monitoring. */
5268 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5269 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5270 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5271 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5272 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5273 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5274 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5275 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5276 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5277 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5278 case PGMPOOLKIND_ROOT_NESTED:
5279 case PGMPOOLKIND_PAE_PD_PHYS:
5280 case PGMPOOLKIND_PAE_PDPT_PHYS:
5281 case PGMPOOLKIND_32BIT_PD_PHYS:
5282 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5283 break;
5284
5285 default:
5286 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5287 }
5288 }
5289
5290 /* next */
5291 i = pPage->iNext;
5292 } while (i != NIL_PGMPOOL_IDX);
5293 return;
5294}
5295
5296
5297/**
5298 * Reset CPU on hot plugging.
5299 *
5300 * @param pVM The cross context VM structure.
5301 * @param pVCpu The cross context virtual CPU structure.
5302 */
5303void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5304{
5305 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5306
5307 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5308 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5309 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5310}
5311
5312
5313/**
5314 * Flushes the entire cache.
5315 *
5316 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5317 * this and execute this CR3 flush.
5318 *
5319 * @param pVM The cross context VM structure.
5320 */
5321void pgmR3PoolReset(PVM pVM)
5322{
5323 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5324
5325 PGM_LOCK_ASSERT_OWNER(pVM);
5326 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5327 LogFlow(("pgmR3PoolReset:\n"));
5328
5329 /*
5330 * If there are no pages in the pool, there is nothing to do.
5331 */
5332 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5333 {
5334 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5335 return;
5336 }
5337
5338 /*
5339 * Exit the shadow mode since we're going to clear everything,
5340 * including the root page.
5341 */
5342 VMCC_FOR_EACH_VMCPU(pVM)
5343 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5344 VMCC_FOR_EACH_VMCPU_END(pVM);
5345
5346
5347 /*
5348 * Nuke the free list and reinsert all pages into it.
5349 */
5350 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5351 {
5352 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5353
5354 if (pPage->fMonitored)
5355 pgmPoolMonitorFlush(pPool, pPage);
5356 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5357 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5358 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5359 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5360 pPage->GCPhys = NIL_RTGCPHYS;
5361 pPage->enmKind = PGMPOOLKIND_FREE;
5362 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5363 Assert(pPage->idx == i);
5364 pPage->iNext = i + 1;
5365 pPage->fA20Enabled = true;
5366 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5367 pPage->fSeenNonGlobal = false;
5368 pPage->fMonitored = false;
5369 pPage->fDirty = false;
5370 pPage->fCached = false;
5371 pPage->fReusedFlushPending = false;
5372 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5373 pPage->cPresent = 0;
5374 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5375 pPage->cModifications = 0;
5376 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5377 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5378 pPage->idxDirtyEntry = 0;
5379 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5380 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5381 pPage->cLastAccessHandler = 0;
5382 pPage->cLocked = 0;
5383# ifdef VBOX_STRICT
5384 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5385# endif
5386 }
5387 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5388 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5389 pPool->cUsedPages = 0;
5390
5391 /*
5392 * Zap and reinitialize the user records.
5393 */
5394 pPool->cPresent = 0;
5395 pPool->iUserFreeHead = 0;
5396 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5397 const unsigned cMaxUsers = pPool->cMaxUsers;
5398 for (unsigned i = 0; i < cMaxUsers; i++)
5399 {
5400 paUsers[i].iNext = i + 1;
5401 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5402 paUsers[i].iUserTable = 0xfffffffe;
5403 }
5404 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5405
5406 /*
5407 * Clear all the GCPhys links and rebuild the phys ext free list.
5408 */
5409 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
5410 pRam;
5411 pRam = pRam->CTX_SUFF(pNext))
5412 {
5413 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5414 while (iPage-- > 0)
5415 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5416 }
5417
5418 pPool->iPhysExtFreeHead = 0;
5419 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5420 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5421 for (unsigned i = 0; i < cMaxPhysExts; i++)
5422 {
5423 paPhysExts[i].iNext = i + 1;
5424 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5425 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5426 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5427 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5428 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5429 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5430 }
5431 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5432
5433 /*
5434 * Just zap the modified list.
5435 */
5436 pPool->cModifiedPages = 0;
5437 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5438
5439 /*
5440 * Clear the GCPhys hash and the age list.
5441 */
5442 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5443 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5444 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5445 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5446
5447# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5448 /* Clear all dirty pages. */
5449 pPool->idxFreeDirtyPage = 0;
5450 pPool->cDirtyPages = 0;
5451 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aidxDirtyPages); i++)
5452 pPool->aidxDirtyPages[i] = NIL_PGMPOOL_IDX;
5453# endif
5454
5455 /*
5456 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5457 */
5458 VMCC_FOR_EACH_VMCPU(pVM)
5459 {
5460 /*
5461 * Re-enter the shadowing mode and assert Sync CR3 FF.
5462 */
5463 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5464 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5465 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5466 }
5467 VMCC_FOR_EACH_VMCPU_END(pVM);
5468
5469 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5470}
5471
5472#endif /* IN_RING3 */
5473
5474#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5475/**
5476 * Stringifies a PGMPOOLKIND value.
5477 */
5478static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5479{
5480 switch ((PGMPOOLKIND)enmKind)
5481 {
5482 case PGMPOOLKIND_INVALID:
5483 return "PGMPOOLKIND_INVALID";
5484 case PGMPOOLKIND_FREE:
5485 return "PGMPOOLKIND_FREE";
5486 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5487 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5488 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5489 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5490 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5491 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5492 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5493 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5494 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5495 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5496 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5497 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5498 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5499 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5500 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5501 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5502 case PGMPOOLKIND_32BIT_PD:
5503 return "PGMPOOLKIND_32BIT_PD";
5504 case PGMPOOLKIND_32BIT_PD_PHYS:
5505 return "PGMPOOLKIND_32BIT_PD_PHYS";
5506 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5507 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5508 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5509 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5510 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5511 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5512 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5513 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5514 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5515 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5516 case PGMPOOLKIND_PAE_PD_PHYS:
5517 return "PGMPOOLKIND_PAE_PD_PHYS";
5518 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5519 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5520 case PGMPOOLKIND_PAE_PDPT:
5521 return "PGMPOOLKIND_PAE_PDPT";
5522 case PGMPOOLKIND_PAE_PDPT_PHYS:
5523 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5524 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5525 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5526 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5527 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5528 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5529 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5530 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5531 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5532 case PGMPOOLKIND_64BIT_PML4:
5533 return "PGMPOOLKIND_64BIT_PML4";
5534 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5535 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5536 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5537 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5538 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5539 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5540 case PGMPOOLKIND_ROOT_NESTED:
5541 return "PGMPOOLKIND_ROOT_NESTED";
5542 }
5543 return "Unknown kind!";
5544}
5545#endif /* LOG_ENABLED || VBOX_STRICT */
5546
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette