VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 85416

Last change on this file since 85416 was 82968, checked in by vboxsync, 5 years ago

Copyright year updates by scm.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 210.5 KB
Line 
1/* $Id: PGMAllPool.cpp 82968 2020-02-04 10:35:17Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2020 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#include <VBox/vmm/pgm.h>
24#include <VBox/vmm/mm.h>
25#include <VBox/vmm/em.h>
26#include <VBox/vmm/cpum.h>
27#include "PGMInternal.h"
28#include <VBox/vmm/vmcc.h>
29#include "PGMInline.h"
30#include <VBox/disopcode.h>
31#include <VBox/vmm/hm_vmx.h>
32
33#include <VBox/log.h>
34#include <VBox/err.h>
35#include <iprt/asm.h>
36#include <iprt/asm-amd64-x86.h>
37#include <iprt/string.h>
38
39
40/*********************************************************************************************************************************
41* Internal Functions *
42*********************************************************************************************************************************/
43RT_C_DECLS_BEGIN
44#if 0 /* unused */
45DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
46DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
47#endif /* unused */
48static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
49static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
50static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
51static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
52#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
53static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
54#endif
55#if 0 /*defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)*/
56static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
57#endif
58
59int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage);
60PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
61void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
62void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
63
64RT_C_DECLS_END
65
66
67#if 0 /* unused */
68/**
69 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
70 *
71 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
72 * @param enmKind The page kind.
73 */
74DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
75{
76 switch (enmKind)
77 {
78 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
79 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
81 return true;
82 default:
83 return false;
84 }
85}
86#endif /* unused */
87
88
89/**
90 * Flushes a chain of pages sharing the same access monitor.
91 *
92 * @param pPool The pool.
93 * @param pPage A page in the chain.
94 */
95void pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
96{
97 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
98
99 /*
100 * Find the list head.
101 */
102 uint16_t idx = pPage->idx;
103 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
104 {
105 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
106 {
107 idx = pPage->iMonitoredPrev;
108 Assert(idx != pPage->idx);
109 pPage = &pPool->aPages[idx];
110 }
111 }
112
113 /*
114 * Iterate the list flushing each shadow page.
115 */
116 for (;;)
117 {
118 idx = pPage->iMonitoredNext;
119 Assert(idx != pPage->idx);
120 if (pPage->idx >= PGMPOOL_IDX_FIRST)
121 {
122 int rc2 = pgmPoolFlushPage(pPool, pPage);
123 AssertRC(rc2);
124 }
125 /* next */
126 if (idx == NIL_PGMPOOL_IDX)
127 break;
128 pPage = &pPool->aPages[idx];
129 }
130}
131
132
133/**
134 * Wrapper for getting the current context pointer to the entry being modified.
135 *
136 * @returns VBox status code suitable for scheduling.
137 * @param pVM The cross context VM structure.
138 * @param pvDst Destination address
139 * @param pvSrc Pointer to the mapping of @a GCPhysSrc or NULL depending
140 * on the context (e.g. \#PF in R0 & RC).
141 * @param GCPhysSrc The source guest physical address.
142 * @param cb Size of data to read
143 */
144DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVMCC pVM, void *pvDst, void const *pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
145{
146#if defined(IN_RING3)
147 NOREF(pVM); NOREF(GCPhysSrc);
148 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
149 return VINF_SUCCESS;
150#else
151 /** @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
152 NOREF(pvSrc);
153 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
154#endif
155}
156
157
158/**
159 * Process shadow entries before they are changed by the guest.
160 *
161 * For PT entries we will clear them. For PD entries, we'll simply check
162 * for mapping conflicts and set the SyncCR3 FF if found.
163 *
164 * @param pVCpu The cross context virtual CPU structure.
165 * @param pPool The pool.
166 * @param pPage The head page.
167 * @param GCPhysFault The guest physical fault address.
168 * @param pvAddress Pointer to the mapping of @a GCPhysFault or NULL
169 * depending on the context (e.g. \#PF in R0 & RC).
170 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
171 */
172static void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault,
173 void const *pvAddress, unsigned cbWrite)
174{
175 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
176 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
177 PVMCC pVM = pPool->CTX_SUFF(pVM);
178 NOREF(pVCpu);
179
180 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n",
181 (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))(uintptr_t)pvAddress, GCPhysFault, cbWrite));
182
183 for (;;)
184 {
185 union
186 {
187 void *pv;
188 PX86PT pPT;
189 PPGMSHWPTPAE pPTPae;
190 PX86PD pPD;
191 PX86PDPAE pPDPae;
192 PX86PDPT pPDPT;
193 PX86PML4 pPML4;
194 } uShw;
195
196 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s write=%#x\n",
197 pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
198
199 uShw.pv = NULL;
200 switch (pPage->enmKind)
201 {
202 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
203 {
204 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
205 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
206 const unsigned iShw = off / sizeof(X86PTE);
207 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
208 if (uShw.pPT->a[iShw].n.u1Present)
209 {
210 X86PTE GstPte;
211
212 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
213 AssertRC(rc);
214 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
215 pgmPoolTracDerefGCPhysHint(pPool, pPage,
216 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
217 GstPte.u & X86_PTE_PG_MASK,
218 iShw);
219 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
220 }
221 break;
222 }
223
224 /* page/2 sized */
225 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
226 {
227 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
228 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
229 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
230 {
231 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
232 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
233 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
234 {
235 X86PTE GstPte;
236 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
237 AssertRC(rc);
238
239 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
240 pgmPoolTracDerefGCPhysHint(pPool, pPage,
241 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
242 GstPte.u & X86_PTE_PG_MASK,
243 iShw);
244 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
245 }
246 }
247 break;
248 }
249
250 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
251 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
252 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
253 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
254 {
255 unsigned iGst = off / sizeof(X86PDE);
256 unsigned iShwPdpt = iGst / 256;
257 unsigned iShw = (iGst % 256) * 2;
258 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
259
260 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
261 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
262 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
263 {
264 for (unsigned i = 0; i < 2; i++)
265 {
266 if (uShw.pPDPae->a[iShw+i].n.u1Present)
267 {
268 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
269 pgmPoolFree(pVM,
270 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
271 pPage->idx,
272 iShw + i);
273 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw+i].u, 0);
274 }
275
276 /* paranoia / a bit assumptive. */
277 if ( (off & 3)
278 && (off & 3) + cbWrite > 4)
279 {
280 const unsigned iShw2 = iShw + 2 + i;
281 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
282 {
283 if (uShw.pPDPae->a[iShw2].n.u1Present)
284 {
285 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
286 pgmPoolFree(pVM,
287 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
288 pPage->idx,
289 iShw2);
290 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
291 }
292 }
293 }
294 }
295 }
296 break;
297 }
298
299 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
300 {
301 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
302 const unsigned iShw = off / sizeof(X86PTEPAE);
303 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
304 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
305 {
306 X86PTEPAE GstPte;
307 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
308 AssertRC(rc);
309
310 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
311 pgmPoolTracDerefGCPhysHint(pPool, pPage,
312 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
313 GstPte.u & X86_PTE_PAE_PG_MASK,
314 iShw);
315 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
316 }
317
318 /* paranoia / a bit assumptive. */
319 if ( (off & 7)
320 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
321 {
322 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
323 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
324
325 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
326 {
327 X86PTEPAE GstPte;
328 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
329 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
330 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
331 AssertRC(rc);
332 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
333 pgmPoolTracDerefGCPhysHint(pPool, pPage,
334 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
335 GstPte.u & X86_PTE_PAE_PG_MASK,
336 iShw2);
337 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
338 }
339 }
340 break;
341 }
342
343 case PGMPOOLKIND_32BIT_PD:
344 {
345 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
346 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
347
348 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
349 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
350 if (uShw.pPD->a[iShw].n.u1Present)
351 {
352 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
353 pgmPoolFree(pVM,
354 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
355 pPage->idx,
356 iShw);
357 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
358 }
359 /* paranoia / a bit assumptive. */
360 if ( (off & 3)
361 && (off & 3) + cbWrite > sizeof(X86PTE))
362 {
363 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
364 if ( iShw2 != iShw
365 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
366 {
367 if (uShw.pPD->a[iShw2].n.u1Present)
368 {
369 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
370 pgmPoolFree(pVM,
371 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
372 pPage->idx,
373 iShw2);
374 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
375 }
376 }
377 }
378#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). - not working any longer... */
379 if ( uShw.pPD->a[iShw].n.u1Present
380 && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
381 {
382 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
383 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
384 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
385 }
386#endif
387 break;
388 }
389
390 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
391 {
392 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
393 const unsigned iShw = off / sizeof(X86PDEPAE);
394 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
395
396 /*
397 * Causes trouble when the guest uses a PDE to refer to the whole page table level
398 * structure. (Invalidate here; faults later on when it tries to change the page
399 * table entries -> recheck; probably only applies to the RC case.)
400 */
401 if (uShw.pPDPae->a[iShw].n.u1Present)
402 {
403 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
404 pgmPoolFree(pVM,
405 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
406 pPage->idx,
407 iShw);
408 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
409 }
410
411 /* paranoia / a bit assumptive. */
412 if ( (off & 7)
413 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
414 {
415 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
416 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
417
418 if (uShw.pPDPae->a[iShw2].n.u1Present)
419 {
420 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
421 pgmPoolFree(pVM,
422 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
423 pPage->idx,
424 iShw2);
425 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
426 }
427 }
428 break;
429 }
430
431 case PGMPOOLKIND_PAE_PDPT:
432 {
433 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
434 /*
435 * Hopefully this doesn't happen very often:
436 * - touching unused parts of the page
437 * - messing with the bits of pd pointers without changing the physical address
438 */
439 /* PDPT roots are not page aligned; 32 byte only! */
440 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
441
442 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
443 const unsigned iShw = offPdpt / sizeof(X86PDPE);
444 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
445 {
446 if (uShw.pPDPT->a[iShw].n.u1Present)
447 {
448 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
449 pgmPoolFree(pVM,
450 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
451 pPage->idx,
452 iShw);
453 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
454 }
455
456 /* paranoia / a bit assumptive. */
457 if ( (offPdpt & 7)
458 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
459 {
460 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
461 if ( iShw2 != iShw
462 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
463 {
464 if (uShw.pPDPT->a[iShw2].n.u1Present)
465 {
466 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
467 pgmPoolFree(pVM,
468 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
469 pPage->idx,
470 iShw2);
471 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
472 }
473 }
474 }
475 }
476 break;
477 }
478
479 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
480 {
481 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
482 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
483 const unsigned iShw = off / sizeof(X86PDEPAE);
484 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
485 if (uShw.pPDPae->a[iShw].n.u1Present)
486 {
487 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
488 pgmPoolFree(pVM,
489 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
490 pPage->idx,
491 iShw);
492 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
493 }
494 /* paranoia / a bit assumptive. */
495 if ( (off & 7)
496 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
497 {
498 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
499 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
500
501 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
502 if (uShw.pPDPae->a[iShw2].n.u1Present)
503 {
504 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
505 pgmPoolFree(pVM,
506 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
507 pPage->idx,
508 iShw2);
509 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
510 }
511 }
512 break;
513 }
514
515 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
516 {
517 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
518 /*
519 * Hopefully this doesn't happen very often:
520 * - messing with the bits of pd pointers without changing the physical address
521 */
522 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
523 const unsigned iShw = off / sizeof(X86PDPE);
524 if (uShw.pPDPT->a[iShw].n.u1Present)
525 {
526 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
527 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
528 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
529 }
530 /* paranoia / a bit assumptive. */
531 if ( (off & 7)
532 && (off & 7) + cbWrite > sizeof(X86PDPE))
533 {
534 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
535 if (uShw.pPDPT->a[iShw2].n.u1Present)
536 {
537 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
538 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
539 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
540 }
541 }
542 break;
543 }
544
545 case PGMPOOLKIND_64BIT_PML4:
546 {
547 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
548 /*
549 * Hopefully this doesn't happen very often:
550 * - messing with the bits of pd pointers without changing the physical address
551 */
552 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
553 const unsigned iShw = off / sizeof(X86PDPE);
554 if (uShw.pPML4->a[iShw].n.u1Present)
555 {
556 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
557 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
558 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
559 }
560 /* paranoia / a bit assumptive. */
561 if ( (off & 7)
562 && (off & 7) + cbWrite > sizeof(X86PDPE))
563 {
564 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
565 if (uShw.pPML4->a[iShw2].n.u1Present)
566 {
567 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
568 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
569 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
570 }
571 }
572 break;
573 }
574
575 default:
576 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
577 }
578 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
579
580 /* next */
581 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
582 return;
583 pPage = &pPool->aPages[pPage->iMonitoredNext];
584 }
585}
586
587#ifndef IN_RING3
588
589/**
590 * Checks if a access could be a fork operation in progress.
591 *
592 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
593 *
594 * @returns true if it's likely that we're forking, otherwise false.
595 * @param pPool The pool.
596 * @param pDis The disassembled instruction.
597 * @param offFault The access offset.
598 */
599DECLINLINE(bool) pgmRZPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
600{
601 /*
602 * i386 linux is using btr to clear X86_PTE_RW.
603 * The functions involved are (2.6.16 source inspection):
604 * clear_bit
605 * ptep_set_wrprotect
606 * copy_one_pte
607 * copy_pte_range
608 * copy_pmd_range
609 * copy_pud_range
610 * copy_page_range
611 * dup_mmap
612 * dup_mm
613 * copy_mm
614 * copy_process
615 * do_fork
616 */
617 if ( pDis->pCurInstr->uOpcode == OP_BTR
618 && !(offFault & 4)
619 /** @todo Validate that the bit index is X86_PTE_RW. */
620 )
621 {
622 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,Fork)); RT_NOREF_PV(pPool);
623 return true;
624 }
625 return false;
626}
627
628
629/**
630 * Determine whether the page is likely to have been reused.
631 *
632 * @returns true if we consider the page as being reused for a different purpose.
633 * @returns false if we consider it to still be a paging page.
634 * @param pVM The cross context VM structure.
635 * @param pVCpu The cross context virtual CPU structure.
636 * @param pRegFrame Trap register frame.
637 * @param pDis The disassembly info for the faulting instruction.
638 * @param pvFault The fault address.
639 * @param pPage The pool page being accessed.
640 *
641 * @remark The REP prefix check is left to the caller because of STOSD/W.
642 */
643DECLINLINE(bool) pgmRZPoolMonitorIsReused(PVMCC pVM, PVMCPUCC pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault,
644 PPGMPOOLPAGE pPage)
645{
646 /* Locked (CR3, PDPTR*4) should not be reusable. Considering them as
647 such may cause loops booting tst-ubuntu-15_10-64-efi, ++. */
648 if (pPage->cLocked)
649 {
650 Log2(("pgmRZPoolMonitorIsReused: %RGv (%p) can't have been resued, because it's locked!\n", pvFault, pPage));
651 return false;
652 }
653
654 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
655 if ( HMHasPendingIrq(pVM)
656 && pRegFrame->rsp - pvFault < 32)
657 {
658 /* Fault caused by stack writes while trying to inject an interrupt event. */
659 Log(("pgmRZPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
660 return true;
661 }
662
663 LogFlow(("Reused instr %RGv %d at %RGv param1.fUse=%llx param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->uOpcode, pvFault, pDis->Param1.fUse, pDis->Param1.Base.idxGenReg));
664
665 /* Non-supervisor mode write means it's used for something else. */
666 if (CPUMGetGuestCPL(pVCpu) == 3)
667 return true;
668
669 switch (pDis->pCurInstr->uOpcode)
670 {
671 /* call implies the actual push of the return address faulted */
672 case OP_CALL:
673 Log4(("pgmRZPoolMonitorIsReused: CALL\n"));
674 return true;
675 case OP_PUSH:
676 Log4(("pgmRZPoolMonitorIsReused: PUSH\n"));
677 return true;
678 case OP_PUSHF:
679 Log4(("pgmRZPoolMonitorIsReused: PUSHF\n"));
680 return true;
681 case OP_PUSHA:
682 Log4(("pgmRZPoolMonitorIsReused: PUSHA\n"));
683 return true;
684 case OP_FXSAVE:
685 Log4(("pgmRZPoolMonitorIsReused: FXSAVE\n"));
686 return true;
687 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
688 Log4(("pgmRZPoolMonitorIsReused: MOVNTI\n"));
689 return true;
690 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
691 Log4(("pgmRZPoolMonitorIsReused: MOVNTDQ\n"));
692 return true;
693 case OP_MOVSWD:
694 case OP_STOSWD:
695 if ( pDis->fPrefix == (DISPREFIX_REP|DISPREFIX_REX)
696 && pRegFrame->rcx >= 0x40
697 )
698 {
699 Assert(pDis->uCpuMode == DISCPUMODE_64BIT);
700
701 Log(("pgmRZPoolMonitorIsReused: OP_STOSQ\n"));
702 return true;
703 }
704 break;
705
706 default:
707 /*
708 * Anything having ESP on the left side means stack writes.
709 */
710 if ( ( (pDis->Param1.fUse & DISUSE_REG_GEN32)
711 || (pDis->Param1.fUse & DISUSE_REG_GEN64))
712 && (pDis->Param1.Base.idxGenReg == DISGREG_ESP))
713 {
714 Log4(("pgmRZPoolMonitorIsReused: ESP\n"));
715 return true;
716 }
717 break;
718 }
719
720 /*
721 * Page table updates are very very unlikely to be crossing page boundraries,
722 * and we don't want to deal with that in pgmPoolMonitorChainChanging and such.
723 */
724 uint32_t const cbWrite = DISGetParamSize(pDis, &pDis->Param1);
725 if ( (((uintptr_t)pvFault + cbWrite) >> X86_PAGE_SHIFT) != ((uintptr_t)pvFault >> X86_PAGE_SHIFT) )
726 {
727 Log4(("pgmRZPoolMonitorIsReused: cross page write\n"));
728 return true;
729 }
730
731 /*
732 * Nobody does an unaligned 8 byte write to a page table, right.
733 */
734 if (cbWrite >= 8 && ((uintptr_t)pvFault & 7) != 0)
735 {
736 Log4(("pgmRZPoolMonitorIsReused: Unaligned 8+ byte write\n"));
737 return true;
738 }
739
740 return false;
741}
742
743
744/**
745 * Flushes the page being accessed.
746 *
747 * @returns VBox status code suitable for scheduling.
748 * @param pVM The cross context VM structure.
749 * @param pVCpu The cross context virtual CPU structure.
750 * @param pPool The pool.
751 * @param pPage The pool page (head).
752 * @param pDis The disassembly of the write instruction.
753 * @param pRegFrame The trap register frame.
754 * @param GCPhysFault The fault address as guest physical address.
755 * @param pvFault The fault address.
756 * @todo VBOXSTRICTRC
757 */
758static int pgmRZPoolAccessPfHandlerFlush(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
759 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
760{
761 NOREF(pVM); NOREF(GCPhysFault);
762
763 /*
764 * First, do the flushing.
765 */
766 pgmPoolMonitorChainFlush(pPool, pPage);
767
768 /*
769 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
770 * Must do this in raw mode (!); XP boot will fail otherwise.
771 */
772 int rc = VINF_SUCCESS;
773 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
774 if (rc2 == VINF_SUCCESS)
775 { /* do nothing */ }
776 else if (rc2 == VINF_EM_RESCHEDULE)
777 {
778 rc = VBOXSTRICTRC_VAL(rc2);
779# ifndef IN_RING3
780 VMCPU_FF_SET(pVCpu, VMCPU_FF_TO_R3);
781# endif
782 }
783 else if (rc2 == VERR_EM_INTERPRETER)
784 {
785 rc = VINF_EM_RAW_EMULATE_INSTR;
786 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
787 }
788 else if (RT_FAILURE_NP(rc2))
789 rc = VBOXSTRICTRC_VAL(rc2);
790 else
791 AssertMsgFailed(("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
792
793 LogFlow(("pgmRZPoolAccessPfHandlerFlush: returns %Rrc (flushed)\n", rc));
794 return rc;
795}
796
797
798/**
799 * Handles the STOSD write accesses.
800 *
801 * @returns VBox status code suitable for scheduling.
802 * @param pVM The cross context VM structure.
803 * @param pPool The pool.
804 * @param pPage The pool page (head).
805 * @param pDis The disassembly of the write instruction.
806 * @param pRegFrame The trap register frame.
807 * @param GCPhysFault The fault address as guest physical address.
808 * @param pvFault The fault address.
809 */
810DECLINLINE(int) pgmRZPoolAccessPfHandlerSTOSD(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
811 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
812{
813 unsigned uIncrement = pDis->Param1.cb;
814 NOREF(pVM);
815
816 Assert(pDis->uCpuMode == DISCPUMODE_32BIT || pDis->uCpuMode == DISCPUMODE_64BIT);
817 Assert(pRegFrame->rcx <= 0x20);
818
819# ifdef VBOX_STRICT
820 if (pDis->uOpMode == DISCPUMODE_32BIT)
821 Assert(uIncrement == 4);
822 else
823 Assert(uIncrement == 8);
824# endif
825
826 Log3(("pgmRZPoolAccessPfHandlerSTOSD\n"));
827
828 /*
829 * Increment the modification counter and insert it into the list
830 * of modified pages the first time.
831 */
832 if (!pPage->cModifications++)
833 pgmPoolMonitorModifiedInsert(pPool, pPage);
834
835 /*
836 * Execute REP STOSD.
837 *
838 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
839 * write situation, meaning that it's safe to write here.
840 */
841 PVMCPUCC pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
842 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
843 while (pRegFrame->rcx)
844 {
845# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
846 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
847 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
848 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
849# else
850 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
851# endif
852 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
853 pu32 += uIncrement;
854 GCPhysFault += uIncrement;
855 pRegFrame->rdi += uIncrement;
856 pRegFrame->rcx--;
857 }
858 pRegFrame->rip += pDis->cbInstr;
859
860 LogFlow(("pgmRZPoolAccessPfHandlerSTOSD: returns\n"));
861 return VINF_SUCCESS;
862}
863
864
865/**
866 * Handles the simple write accesses.
867 *
868 * @returns VBox status code suitable for scheduling.
869 * @param pVM The cross context VM structure.
870 * @param pVCpu The cross context virtual CPU structure.
871 * @param pPool The pool.
872 * @param pPage The pool page (head).
873 * @param pDis The disassembly of the write instruction.
874 * @param pRegFrame The trap register frame.
875 * @param GCPhysFault The fault address as guest physical address.
876 * @param pvFault The fault address.
877 * @param pfReused Reused state (in/out)
878 */
879DECLINLINE(int) pgmRZPoolAccessPfHandlerSimple(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
880 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
881{
882 Log3(("pgmRZPoolAccessPfHandlerSimple\n"));
883 NOREF(pVM);
884 NOREF(pfReused); /* initialized by caller */
885
886 /*
887 * Increment the modification counter and insert it into the list
888 * of modified pages the first time.
889 */
890 if (!pPage->cModifications++)
891 pgmPoolMonitorModifiedInsert(pPool, pPage);
892
893 /*
894 * Clear all the pages. ASSUMES that pvFault is readable.
895 */
896# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
897 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
898# endif
899
900 uint32_t cbWrite = DISGetParamSize(pDis, &pDis->Param1);
901 if (cbWrite <= 8)
902 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, cbWrite);
903 else if (cbWrite <= 16)
904 {
905 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, 8);
906 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + 8, NULL, cbWrite - 8);
907 }
908 else
909 {
910 Assert(cbWrite <= 32);
911 for (uint32_t off = 0; off < cbWrite; off += 8)
912 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + off, NULL, RT_MIN(8, cbWrite - off));
913 }
914
915# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
916 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
917# endif
918
919 /*
920 * Interpret the instruction.
921 */
922 VBOXSTRICTRC rc = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
923 if (RT_SUCCESS(rc))
924 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
925 else if (rc == VERR_EM_INTERPRETER)
926 {
927 LogFlow(("pgmRZPoolAccessPfHandlerSimple: Interpretation failed for %04x:%RGv - opcode=%d\n",
928 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode));
929 rc = VINF_EM_RAW_EMULATE_INSTR;
930 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
931 }
932
933# if 0 /* experimental code */
934 if (rc == VINF_SUCCESS)
935 {
936 switch (pPage->enmKind)
937 {
938 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
939 {
940 X86PTEPAE GstPte;
941 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
942 AssertRC(rc);
943
944 /* Check the new value written by the guest. If present and with a bogus physical address, then
945 * it's fairly safe to assume the guest is reusing the PT.
946 */
947 if (GstPte.n.u1Present)
948 {
949 RTHCPHYS HCPhys = -1;
950 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
951 if (rc != VINF_SUCCESS)
952 {
953 *pfReused = true;
954 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
955 }
956 }
957 break;
958 }
959 }
960 }
961# endif
962
963 LogFlow(("pgmRZPoolAccessPfHandlerSimple: returns %Rrc\n", VBOXSTRICTRC_VAL(rc)));
964 return VBOXSTRICTRC_VAL(rc);
965}
966
967
968/**
969 * @callback_method_impl{FNPGMRZPHYSPFHANDLER,
970 * \#PF access handler callback for page table pages.}
971 *
972 * @remarks The @a pvUser argument points to the PGMPOOLPAGE.
973 */
974DECLEXPORT(VBOXSTRICTRC) pgmRZPoolAccessPfHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame,
975 RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
976{
977 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorRZ, a);
978 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
979 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
980 unsigned cMaxModifications;
981 bool fForcedFlush = false;
982 NOREF(uErrorCode);
983
984 LogFlow(("pgmRZPoolAccessPfHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
985
986 pgmLock(pVM);
987 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
988 {
989 /* Pool page changed while we were waiting for the lock; ignore. */
990 Log(("CPU%d: pgmRZPoolAccessPfHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
991 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
992 pgmUnlock(pVM);
993 return VINF_SUCCESS;
994 }
995# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
996 if (pPage->fDirty)
997 {
998 Assert(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH));
999 pgmUnlock(pVM);
1000 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1001 }
1002# endif
1003
1004# if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1005 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1006 {
1007 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1008 void *pvGst;
1009 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1010 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1011 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1012 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1013 }
1014# endif
1015
1016 /*
1017 * Disassemble the faulting instruction.
1018 */
1019 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1020 int rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
1021 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1022 {
1023 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1024 pgmUnlock(pVM);
1025 return rc;
1026 }
1027
1028 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1029
1030 /*
1031 * We should ALWAYS have the list head as user parameter. This
1032 * is because we use that page to record the changes.
1033 */
1034 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1035
1036# ifdef IN_RING0
1037 /* Maximum nr of modifications depends on the page type. */
1038 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1039 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1040 cMaxModifications = 4;
1041 else
1042 cMaxModifications = 24;
1043# else
1044 cMaxModifications = 48;
1045# endif
1046
1047 /*
1048 * Incremental page table updates should weigh more than random ones.
1049 * (Only applies when started from offset 0)
1050 */
1051 pVCpu->pgm.s.cPoolAccessHandler++;
1052 if ( pPage->GCPtrLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1053 && pPage->GCPtrLastAccessHandlerRip < pRegFrame->rip + 0x40
1054 && pvFault == (pPage->GCPtrLastAccessHandlerFault + pDis->Param1.cb)
1055 && pVCpu->pgm.s.cPoolAccessHandler == pPage->cLastAccessHandler + 1)
1056 {
1057 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1058 Assert(pPage->cModifications < 32000);
1059 pPage->cModifications = pPage->cModifications * 2;
1060 pPage->GCPtrLastAccessHandlerFault = pvFault;
1061 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1062 if (pPage->cModifications >= cMaxModifications)
1063 {
1064 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushReinit);
1065 fForcedFlush = true;
1066 }
1067 }
1068
1069 if (pPage->cModifications >= cMaxModifications)
1070 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1071
1072 /*
1073 * Check if it's worth dealing with.
1074 */
1075 bool fReused = false;
1076 bool fNotReusedNotForking = false;
1077 if ( ( pPage->cModifications < cMaxModifications /** @todo \#define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1078 || pgmPoolIsPageLocked(pPage)
1079 )
1080 && !(fReused = pgmRZPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault, pPage))
1081 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1082 {
1083 /*
1084 * Simple instructions, no REP prefix.
1085 */
1086 if (!(pDis->fPrefix & (DISPREFIX_REP | DISPREFIX_REPNE)))
1087 {
1088 rc = pgmRZPoolAccessPfHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1089 if (fReused)
1090 goto flushPage;
1091
1092 /* A mov instruction to change the first page table entry will be remembered so we can detect
1093 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1094 */
1095 if ( rc == VINF_SUCCESS
1096 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1097 && pDis->pCurInstr->uOpcode == OP_MOV
1098 && (pvFault & PAGE_OFFSET_MASK) == 0)
1099 {
1100 pPage->GCPtrLastAccessHandlerFault = pvFault;
1101 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1102 pPage->GCPtrLastAccessHandlerRip = pRegFrame->rip;
1103 /* Make sure we don't kick out a page too quickly. */
1104 if (pPage->cModifications > 8)
1105 pPage->cModifications = 2;
1106 }
1107 else if (pPage->GCPtrLastAccessHandlerFault == pvFault)
1108 {
1109 /* ignore the 2nd write to this page table entry. */
1110 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1111 }
1112 else
1113 {
1114 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
1115 pPage->GCPtrLastAccessHandlerRip = 0;
1116 }
1117
1118 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
1119 pgmUnlock(pVM);
1120 return rc;
1121 }
1122
1123 /*
1124 * Windows is frequently doing small memset() operations (netio test 4k+).
1125 * We have to deal with these or we'll kill the cache and performance.
1126 */
1127 if ( pDis->pCurInstr->uOpcode == OP_STOSWD
1128 && !pRegFrame->eflags.Bits.u1DF
1129 && pDis->uOpMode == pDis->uCpuMode
1130 && pDis->uAddrMode == pDis->uCpuMode)
1131 {
1132 bool fValidStosd = false;
1133
1134 if ( pDis->uCpuMode == DISCPUMODE_32BIT
1135 && pDis->fPrefix == DISPREFIX_REP
1136 && pRegFrame->ecx <= 0x20
1137 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1138 && !((uintptr_t)pvFault & 3)
1139 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1140 )
1141 {
1142 fValidStosd = true;
1143 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1144 }
1145 else
1146 if ( pDis->uCpuMode == DISCPUMODE_64BIT
1147 && pDis->fPrefix == (DISPREFIX_REP | DISPREFIX_REX)
1148 && pRegFrame->rcx <= 0x20
1149 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1150 && !((uintptr_t)pvFault & 7)
1151 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1152 )
1153 {
1154 fValidStosd = true;
1155 }
1156
1157 if (fValidStosd)
1158 {
1159 rc = pgmRZPoolAccessPfHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1160 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZRepStosd, a);
1161 pgmUnlock(pVM);
1162 return rc;
1163 }
1164 }
1165
1166 /* REP prefix, don't bother. */
1167 STAM_COUNTER_INC(&pPool->StatMonitorPfRZRepPrefix);
1168 Log4(("pgmRZPoolAccessPfHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1169 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode, pDis->fPrefix));
1170 fNotReusedNotForking = true;
1171 }
1172
1173# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1174 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1175 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1176 */
1177 if ( pPage->cModifications >= cMaxModifications
1178 && !fForcedFlush
1179 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1180 && ( fNotReusedNotForking
1181 || ( !pgmRZPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault, pPage)
1182 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1183 )
1184 )
1185 {
1186 Assert(!pgmPoolIsPageLocked(pPage));
1187 Assert(pPage->fDirty == false);
1188
1189 /* Flush any monitored duplicates as we will disable write protection. */
1190 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1191 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1192 {
1193 PPGMPOOLPAGE pPageHead = pPage;
1194
1195 /* Find the monitor head. */
1196 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1197 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1198
1199 while (pPageHead)
1200 {
1201 unsigned idxNext = pPageHead->iMonitoredNext;
1202
1203 if (pPageHead != pPage)
1204 {
1205 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1206 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1207 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1208 AssertRC(rc2);
1209 }
1210
1211 if (idxNext == NIL_PGMPOOL_IDX)
1212 break;
1213
1214 pPageHead = &pPool->aPages[idxNext];
1215 }
1216 }
1217
1218 /* The flushing above might fail for locked pages, so double check. */
1219 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1220 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1221 {
1222 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1223
1224 /* Temporarily allow write access to the page table again. */
1225 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK, pPage->GCPhys & PAGE_BASE_GC_MASK);
1226 if (rc == VINF_SUCCESS)
1227 {
1228 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1229 AssertMsg(rc == VINF_SUCCESS
1230 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1231 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1232 || rc == VERR_PAGE_NOT_PRESENT,
1233 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1234# ifdef VBOX_STRICT
1235 pPage->GCPtrDirtyFault = pvFault;
1236# endif
1237
1238 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, a);
1239 pgmUnlock(pVM);
1240 return rc;
1241 }
1242 }
1243 }
1244# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT && IN_RING0 */
1245
1246 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushModOverflow);
1247flushPage:
1248 /*
1249 * Not worth it, so flush it.
1250 *
1251 * If we considered it to be reused, don't go back to ring-3
1252 * to emulate failed instructions since we usually cannot
1253 * interpret then. This may be a bit risky, in which case
1254 * the reuse detection must be fixed.
1255 */
1256 rc = pgmRZPoolAccessPfHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1257 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1258 && fReused)
1259 {
1260 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1261 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1262 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1263 }
1264 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZFlushPage, a);
1265 pgmUnlock(pVM);
1266 return rc;
1267}
1268
1269#endif /* !IN_RING3 */
1270
1271/**
1272 * @callback_method_impl{FNPGMPHYSHANDLER,
1273 * Access handler for shadowed page table pages.}
1274 *
1275 * @remarks Only uses the VINF_PGM_HANDLER_DO_DEFAULT status.
1276 */
1277PGM_ALL_CB2_DECL(VBOXSTRICTRC)
1278pgmPoolAccessHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf,
1279 PGMACCESSTYPE enmAccessType, PGMACCESSORIGIN enmOrigin, void *pvUser)
1280{
1281 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1282 STAM_PROFILE_START(&pPool->CTX_SUFF_Z(StatMonitor), a);
1283 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1284 LogFlow(("PGM_ALL_CB_DECL: GCPhys=%RGp %p:{.Core=%RHp, .idx=%d, .GCPhys=%RGp, .enmType=%d}\n",
1285 GCPhys, pPage, pPage->Core.Key, pPage->idx, pPage->GCPhys, pPage->enmKind));
1286
1287 NOREF(pvPhys); NOREF(pvBuf); NOREF(enmAccessType);
1288
1289 pgmLock(pVM);
1290
1291#ifdef VBOX_WITH_STATISTICS
1292 /*
1293 * Collect stats on the access.
1294 */
1295 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Sizes)) == 19);
1296 if (cbBuf <= 16 && cbBuf > 0)
1297 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[cbBuf - 1]);
1298 else if (cbBuf >= 17 && cbBuf < 32)
1299 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[16]);
1300 else if (cbBuf >= 32 && cbBuf < 64)
1301 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[17]);
1302 else if (cbBuf >= 64)
1303 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[18]);
1304
1305 uint8_t cbAlign;
1306 switch (pPage->enmKind)
1307 {
1308 default:
1309 cbAlign = 7;
1310 break;
1311 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1312 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1313 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1314 case PGMPOOLKIND_32BIT_PD:
1315 case PGMPOOLKIND_32BIT_PD_PHYS:
1316 cbAlign = 3;
1317 break;
1318 }
1319 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Misaligned)) == 7);
1320 if ((uint8_t)GCPhys & cbAlign)
1321 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Misaligned)[((uint8_t)GCPhys & cbAlign) - 1]);
1322#endif
1323
1324 /*
1325 * Make sure the pool page wasn't modified by a different CPU.
1326 */
1327 if (PHYS_PAGE_ADDRESS(GCPhys) == PHYS_PAGE_ADDRESS(pPage->GCPhys))
1328 {
1329 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1330
1331 /* The max modification count before flushing depends on the context and page type. */
1332#ifdef IN_RING3
1333 uint16_t const cMaxModifications = 96; /* it's cheaper here, right? */
1334#else
1335 uint16_t cMaxModifications;
1336 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1337 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1338 cMaxModifications = 4;
1339 else
1340 cMaxModifications = 24;
1341#endif
1342
1343 /*
1344 * We don't have to be very sophisticated about this since there are relativly few calls here.
1345 * However, we must try our best to detect any non-cpu accesses (disk / networking).
1346 */
1347 if ( ( pPage->cModifications < cMaxModifications
1348 || pgmPoolIsPageLocked(pPage) )
1349 && enmOrigin != PGMACCESSORIGIN_DEVICE
1350 && cbBuf <= 16)
1351 {
1352 /* Clear the shadow entry. */
1353 if (!pPage->cModifications++)
1354 pgmPoolMonitorModifiedInsert(pPool, pPage);
1355
1356 if (cbBuf <= 8)
1357 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, (uint32_t)cbBuf);
1358 else
1359 {
1360 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, 8);
1361 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys + 8, (uint8_t *)pvBuf + 8, (uint32_t)cbBuf - 8);
1362 }
1363 }
1364 else
1365 pgmPoolMonitorChainFlush(pPool, pPage);
1366
1367 STAM_PROFILE_STOP_EX(&pPool->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1368 }
1369 else
1370 Log(("CPU%d: PGM_ALL_CB_DECL pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhys), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1371 pgmUnlock(pVM);
1372 return VINF_PGM_HANDLER_DO_DEFAULT;
1373}
1374
1375
1376#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1377
1378# if defined(VBOX_STRICT) && !defined(IN_RING3)
1379
1380/**
1381 * Check references to guest physical memory in a PAE / PAE page table.
1382 *
1383 * @param pPool The pool.
1384 * @param pPage The page.
1385 * @param pShwPT The shadow page table (mapping of the page).
1386 * @param pGstPT The guest page table.
1387 */
1388static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1389{
1390 unsigned cErrors = 0;
1391 int LastRc = -1; /* initialized to shut up gcc */
1392 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1393 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1394 PVMCC pVM = pPool->CTX_SUFF(pVM);
1395
1396# ifdef VBOX_STRICT
1397 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1398 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1399# endif
1400 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1401 {
1402 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1403 {
1404 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1405 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1406 if ( rc != VINF_SUCCESS
1407 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1408 {
1409 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1410 LastPTE = i;
1411 LastRc = rc;
1412 LastHCPhys = HCPhys;
1413 cErrors++;
1414
1415 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1416 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1417 AssertRC(rc);
1418
1419 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1420 {
1421 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1422
1423 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1424 {
1425 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1426
1427 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1428 {
1429 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1430 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1431 {
1432 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1433 }
1434 }
1435
1436 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1437 }
1438 }
1439 }
1440 }
1441 }
1442 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1443}
1444
1445
1446/**
1447 * Check references to guest physical memory in a PAE / 32-bit page table.
1448 *
1449 * @param pPool The pool.
1450 * @param pPage The page.
1451 * @param pShwPT The shadow page table (mapping of the page).
1452 * @param pGstPT The guest page table.
1453 */
1454static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1455{
1456 unsigned cErrors = 0;
1457 int LastRc = -1; /* initialized to shut up gcc */
1458 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1459 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1460 PVMCC pVM = pPool->CTX_SUFF(pVM);
1461
1462# ifdef VBOX_STRICT
1463 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1464 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1465# endif
1466 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1467 {
1468 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1469 {
1470 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1471 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1472 if ( rc != VINF_SUCCESS
1473 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1474 {
1475 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1476 LastPTE = i;
1477 LastRc = rc;
1478 LastHCPhys = HCPhys;
1479 cErrors++;
1480
1481 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1482 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1483 AssertRC(rc);
1484
1485 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1486 {
1487 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1488
1489 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1490 {
1491 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1492
1493 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1494 {
1495 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1496 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1497 {
1498 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1499 }
1500 }
1501
1502 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1503 }
1504 }
1505 }
1506 }
1507 }
1508 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1509}
1510
1511# endif /* VBOX_STRICT && !IN_RING3 */
1512
1513/**
1514 * Clear references to guest physical memory in a PAE / PAE page table.
1515 *
1516 * @returns nr of changed PTEs
1517 * @param pPool The pool.
1518 * @param pPage The page.
1519 * @param pShwPT The shadow page table (mapping of the page).
1520 * @param pGstPT The guest page table.
1521 * @param pOldGstPT The old cached guest page table.
1522 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1523 * @param pfFlush Flush reused page table (out)
1524 */
1525DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1526 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1527{
1528 unsigned cChanged = 0;
1529
1530# ifdef VBOX_STRICT
1531 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1532 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1533# endif
1534 *pfFlush = false;
1535
1536 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1537 {
1538 /* Check the new value written by the guest. If present and with a bogus physical address, then
1539 * it's fairly safe to assume the guest is reusing the PT.
1540 */
1541 if ( fAllowRemoval
1542 && pGstPT->a[i].n.u1Present)
1543 {
1544 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1545 {
1546 *pfFlush = true;
1547 return ++cChanged;
1548 }
1549 }
1550 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1551 {
1552 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1553 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1554 {
1555# ifdef VBOX_STRICT
1556 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1557 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1558 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1559# endif
1560 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1561 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1562 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1563 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1564
1565 if ( uHostAttr == uGuestAttr
1566 && fHostRW <= fGuestRW)
1567 continue;
1568 }
1569 cChanged++;
1570 /* Something was changed, so flush it. */
1571 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1572 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1573 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1574 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1575 }
1576 }
1577 return cChanged;
1578}
1579
1580
1581/**
1582 * Clear references to guest physical memory in a PAE / PAE page table.
1583 *
1584 * @returns nr of changed PTEs
1585 * @param pPool The pool.
1586 * @param pPage The page.
1587 * @param pShwPT The shadow page table (mapping of the page).
1588 * @param pGstPT The guest page table.
1589 * @param pOldGstPT The old cached guest page table.
1590 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1591 * @param pfFlush Flush reused page table (out)
1592 */
1593DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1594 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1595{
1596 unsigned cChanged = 0;
1597
1598# ifdef VBOX_STRICT
1599 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1600 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1601# endif
1602 *pfFlush = false;
1603
1604 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1605 {
1606 /* Check the new value written by the guest. If present and with a bogus physical address, then
1607 * it's fairly safe to assume the guest is reusing the PT.
1608 */
1609 if ( fAllowRemoval
1610 && pGstPT->a[i].n.u1Present)
1611 {
1612 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK))
1613 {
1614 *pfFlush = true;
1615 return ++cChanged;
1616 }
1617 }
1618 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1619 {
1620 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1621 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1622 {
1623# ifdef VBOX_STRICT
1624 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1625 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1626 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1627# endif
1628 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1629 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1630 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1631 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1632
1633 if ( uHostAttr == uGuestAttr
1634 && fHostRW <= fGuestRW)
1635 continue;
1636 }
1637 cChanged++;
1638 /* Something was changed, so flush it. */
1639 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1640 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1641 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1642 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1643 }
1644 }
1645 return cChanged;
1646}
1647
1648
1649/**
1650 * Flush a dirty page
1651 *
1652 * @param pVM The cross context VM structure.
1653 * @param pPool The pool.
1654 * @param idxSlot Dirty array slot index
1655 * @param fAllowRemoval Allow a reused page table to be removed
1656 */
1657static void pgmPoolFlushDirtyPage(PVMCC pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1658{
1659 AssertCompile(RT_ELEMENTS(pPool->aidxDirtyPages) == RT_ELEMENTS(pPool->aDirtyPages));
1660
1661 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1662 unsigned idxPage = pPool->aidxDirtyPages[idxSlot];
1663 if (idxPage == NIL_PGMPOOL_IDX)
1664 return;
1665
1666 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1667 Assert(pPage->idx == idxPage);
1668 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1669
1670 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1671 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1672
1673# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1674 PVMCPU pVCpu = VMMGetCpu(pVM);
1675 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
1676# endif
1677
1678 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1679 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK);
1680 Assert(rc == VINF_SUCCESS);
1681 pPage->fDirty = false;
1682
1683# ifdef VBOX_STRICT
1684 uint64_t fFlags = 0;
1685 RTHCPHYS HCPhys;
1686 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->GCPtrDirtyFault, &fFlags, &HCPhys);
1687 AssertMsg( ( rc == VINF_SUCCESS
1688 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1689 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1690 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1691 || rc == VERR_PAGE_NOT_PRESENT,
1692 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->GCPtrDirtyFault, rc, fFlags));
1693# endif
1694
1695 /* Flush those PTEs that have changed. */
1696 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1697 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1698 void *pvGst;
1699 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1700 bool fFlush;
1701 unsigned cChanges;
1702
1703 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1704 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1705 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1706 else
1707 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1708 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1709
1710 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1711 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1712 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1713 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1714
1715 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1716 Assert(pPage->cModifications);
1717 if (cChanges < 4)
1718 pPage->cModifications = 1; /* must use > 0 here */
1719 else
1720 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1721
1722 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1723 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1724 pPool->idxFreeDirtyPage = idxSlot;
1725
1726 pPool->cDirtyPages--;
1727 pPool->aidxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1728 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1729 if (fFlush)
1730 {
1731 Assert(fAllowRemoval);
1732 Log(("Flush reused page table!\n"));
1733 pgmPoolFlushPage(pPool, pPage);
1734 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1735 }
1736 else
1737 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1738
1739# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1740 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
1741# endif
1742}
1743
1744
1745# ifndef IN_RING3
1746/**
1747 * Add a new dirty page
1748 *
1749 * @param pVM The cross context VM structure.
1750 * @param pPool The pool.
1751 * @param pPage The page.
1752 */
1753void pgmPoolAddDirtyPage(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1754{
1755 PGM_LOCK_ASSERT_OWNER(pVM);
1756 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1757 Assert(!pPage->fDirty);
1758
1759 unsigned idxFree = pPool->idxFreeDirtyPage;
1760 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1761 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1762
1763 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1764 {
1765 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1766 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1767 }
1768 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1769 AssertMsg(pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1770
1771 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1772
1773 /*
1774 * Make a copy of the guest page table as we require valid GCPhys addresses
1775 * when removing references to physical pages.
1776 * (The HCPhys linear lookup is *extremely* expensive!)
1777 */
1778 void *pvGst;
1779 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1780 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst, (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT) ? PAGE_SIZE : PAGE_SIZE/2);
1781# ifdef VBOX_STRICT
1782 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1783 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1784 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1785 else
1786 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1787 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1788# endif
1789 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1790
1791 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1792 pPage->fDirty = true;
1793 pPage->idxDirtyEntry = (uint8_t)idxFree; Assert(pPage->idxDirtyEntry == idxFree);
1794 pPool->aidxDirtyPages[idxFree] = pPage->idx;
1795 pPool->cDirtyPages++;
1796
1797 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1798 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1799 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1800 {
1801 unsigned i;
1802 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1803 {
1804 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1805 if (pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1806 {
1807 pPool->idxFreeDirtyPage = idxFree;
1808 break;
1809 }
1810 }
1811 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1812 }
1813
1814 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1815
1816 /*
1817 * Clear all references to this shadow table. See @bugref{7298}.
1818 */
1819 pgmPoolTrackClearPageUsers(pPool, pPage);
1820}
1821# endif /* !IN_RING3 */
1822
1823
1824/**
1825 * Check if the specified page is dirty (not write monitored)
1826 *
1827 * @return dirty or not
1828 * @param pVM The cross context VM structure.
1829 * @param GCPhys Guest physical address
1830 */
1831bool pgmPoolIsDirtyPageSlow(PVM pVM, RTGCPHYS GCPhys)
1832{
1833 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1834 PGM_LOCK_ASSERT_OWNER(pVM);
1835 if (!pPool->cDirtyPages)
1836 return false;
1837
1838 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1839
1840 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1841 {
1842 unsigned idxPage = pPool->aidxDirtyPages[i];
1843 if (idxPage != NIL_PGMPOOL_IDX)
1844 {
1845 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1846 if (pPage->GCPhys == GCPhys)
1847 return true;
1848 }
1849 }
1850 return false;
1851}
1852
1853
1854/**
1855 * Reset all dirty pages by reinstating page monitoring.
1856 *
1857 * @param pVM The cross context VM structure.
1858 */
1859void pgmPoolResetDirtyPages(PVMCC pVM)
1860{
1861 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1862 PGM_LOCK_ASSERT_OWNER(pVM);
1863 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1864
1865 if (!pPool->cDirtyPages)
1866 return;
1867
1868 Log(("pgmPoolResetDirtyPages\n"));
1869 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1870 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1871
1872 pPool->idxFreeDirtyPage = 0;
1873 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1874 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1875 {
1876 unsigned i;
1877 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1878 {
1879 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
1880 {
1881 pPool->idxFreeDirtyPage = i;
1882 break;
1883 }
1884 }
1885 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1886 }
1887
1888 Assert(pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
1889 return;
1890}
1891
1892
1893/**
1894 * Invalidate the PT entry for the specified page
1895 *
1896 * @param pVM The cross context VM structure.
1897 * @param GCPtrPage Guest page to invalidate
1898 */
1899void pgmPoolResetDirtyPage(PVM pVM, RTGCPTR GCPtrPage)
1900{
1901 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1902 PGM_LOCK_ASSERT_OWNER(pVM);
1903 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1904
1905 if (!pPool->cDirtyPages)
1906 return;
1907
1908 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage)); RT_NOREF_PV(GCPtrPage);
1909 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1910 {
1911 /** @todo What was intended here??? This looks incomplete... */
1912 }
1913}
1914
1915
1916/**
1917 * Reset all dirty pages by reinstating page monitoring.
1918 *
1919 * @param pVM The cross context VM structure.
1920 * @param GCPhysPT Physical address of the page table
1921 */
1922void pgmPoolInvalidateDirtyPage(PVMCC pVM, RTGCPHYS GCPhysPT)
1923{
1924 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1925 PGM_LOCK_ASSERT_OWNER(pVM);
1926 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1927 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
1928
1929 if (!pPool->cDirtyPages)
1930 return;
1931
1932 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1933
1934 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1935 {
1936 unsigned idxPage = pPool->aidxDirtyPages[i];
1937 if (idxPage != NIL_PGMPOOL_IDX)
1938 {
1939 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1940 if (pPage->GCPhys == GCPhysPT)
1941 {
1942 idxDirtyPage = i;
1943 break;
1944 }
1945 }
1946 }
1947
1948 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
1949 {
1950 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
1951 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1952 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1953 {
1954 unsigned i;
1955 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1956 {
1957 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
1958 {
1959 pPool->idxFreeDirtyPage = i;
1960 break;
1961 }
1962 }
1963 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1964 }
1965 }
1966}
1967
1968#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1969
1970/**
1971 * Inserts a page into the GCPhys hash table.
1972 *
1973 * @param pPool The pool.
1974 * @param pPage The page.
1975 */
1976DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1977{
1978 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1979 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1980 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1981 pPage->iNext = pPool->aiHash[iHash];
1982 pPool->aiHash[iHash] = pPage->idx;
1983}
1984
1985
1986/**
1987 * Removes a page from the GCPhys hash table.
1988 *
1989 * @param pPool The pool.
1990 * @param pPage The page.
1991 */
1992DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1993{
1994 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1995 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1996 if (pPool->aiHash[iHash] == pPage->idx)
1997 pPool->aiHash[iHash] = pPage->iNext;
1998 else
1999 {
2000 uint16_t iPrev = pPool->aiHash[iHash];
2001 for (;;)
2002 {
2003 const int16_t i = pPool->aPages[iPrev].iNext;
2004 if (i == pPage->idx)
2005 {
2006 pPool->aPages[iPrev].iNext = pPage->iNext;
2007 break;
2008 }
2009 if (i == NIL_PGMPOOL_IDX)
2010 {
2011 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
2012 break;
2013 }
2014 iPrev = i;
2015 }
2016 }
2017 pPage->iNext = NIL_PGMPOOL_IDX;
2018}
2019
2020
2021/**
2022 * Frees up one cache page.
2023 *
2024 * @returns VBox status code.
2025 * @retval VINF_SUCCESS on success.
2026 * @param pPool The pool.
2027 * @param iUser The user index.
2028 */
2029static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
2030{
2031 const PVMCC pVM = pPool->CTX_SUFF(pVM);
2032 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
2033 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
2034
2035 /*
2036 * Select one page from the tail of the age list.
2037 */
2038 PPGMPOOLPAGE pPage;
2039 for (unsigned iLoop = 0; ; iLoop++)
2040 {
2041 uint16_t iToFree = pPool->iAgeTail;
2042 if (iToFree == iUser && iUser != NIL_PGMPOOL_IDX)
2043 iToFree = pPool->aPages[iToFree].iAgePrev;
2044/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2045 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2046 {
2047 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2048 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2049 {
2050 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2051 continue;
2052 iToFree = i;
2053 break;
2054 }
2055 }
2056*/
2057 Assert(iToFree != iUser);
2058 AssertReleaseMsg(iToFree != NIL_PGMPOOL_IDX,
2059 ("iToFree=%#x (iAgeTail=%#x) iUser=%#x iLoop=%u - pPool=%p LB %#zx\n",
2060 iToFree, pPool->iAgeTail, iUser, iLoop, pPool,
2061 RT_UOFFSETOF_DYN(PGMPOOL, aPages[pPool->cMaxPages])
2062 + pPool->cMaxUsers * sizeof(PGMPOOLUSER)
2063 + pPool->cMaxPhysExts * sizeof(PGMPOOLPHYSEXT) ));
2064
2065 pPage = &pPool->aPages[iToFree];
2066
2067 /*
2068 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2069 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2070 */
2071 if ( !pgmPoolIsPageLocked(pPage)
2072 && pPage->idx >= PGMPOOL_IDX_FIRST /* paranoia (#6349) */)
2073 break;
2074 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2075 pgmPoolCacheUsed(pPool, pPage);
2076 AssertLogRelReturn(iLoop < 8192, VERR_PGM_POOL_TOO_MANY_LOOPS);
2077 }
2078
2079 /*
2080 * Found a usable page, flush it and return.
2081 */
2082 int rc = pgmPoolFlushPage(pPool, pPage);
2083 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2084 /** @todo find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2085 if (rc == VINF_SUCCESS)
2086 PGM_INVL_ALL_VCPU_TLBS(pVM);
2087 return rc;
2088}
2089
2090
2091/**
2092 * Checks if a kind mismatch is really a page being reused
2093 * or if it's just normal remappings.
2094 *
2095 * @returns true if reused and the cached page (enmKind1) should be flushed
2096 * @returns false if not reused.
2097 * @param enmKind1 The kind of the cached page.
2098 * @param enmKind2 The kind of the requested page.
2099 */
2100static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2101{
2102 switch (enmKind1)
2103 {
2104 /*
2105 * Never reuse them. There is no remapping in non-paging mode.
2106 */
2107 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2108 case PGMPOOLKIND_32BIT_PD_PHYS:
2109 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2110 case PGMPOOLKIND_PAE_PD_PHYS:
2111 case PGMPOOLKIND_PAE_PDPT_PHYS:
2112 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2113 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2114 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2115 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2116 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2117 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2118 return false;
2119
2120 /*
2121 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2122 */
2123 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2124 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2125 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2126 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2127 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2128 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2129 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2130 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2131 case PGMPOOLKIND_32BIT_PD:
2132 case PGMPOOLKIND_PAE_PDPT:
2133 switch (enmKind2)
2134 {
2135 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2136 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2137 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2138 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2139 case PGMPOOLKIND_64BIT_PML4:
2140 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2141 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2142 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2143 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2144 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2145 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2146 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2147 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2148 return true;
2149 default:
2150 return false;
2151 }
2152
2153 /*
2154 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2155 */
2156 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2157 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2158 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2159 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2160 case PGMPOOLKIND_64BIT_PML4:
2161 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2162 switch (enmKind2)
2163 {
2164 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2165 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2166 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2167 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2168 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2169 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2170 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2171 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2172 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2173 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2174 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2175 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2176 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2177 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2178 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2179 return true;
2180 default:
2181 return false;
2182 }
2183
2184 /*
2185 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2186 */
2187 case PGMPOOLKIND_ROOT_NESTED:
2188 return false;
2189
2190 default:
2191 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2192 }
2193}
2194
2195
2196/**
2197 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2198 *
2199 * @returns VBox status code.
2200 * @retval VINF_PGM_CACHED_PAGE on success.
2201 * @retval VERR_FILE_NOT_FOUND if not found.
2202 * @param pPool The pool.
2203 * @param GCPhys The GC physical address of the page we're gonna shadow.
2204 * @param enmKind The kind of mapping.
2205 * @param enmAccess Access type for the mapping (only relevant for big pages)
2206 * @param fA20Enabled Whether the CPU has the A20 gate enabled.
2207 * @param iUser The shadow page pool index of the user table. This is
2208 * NIL_PGMPOOL_IDX for root pages.
2209 * @param iUserTable The index into the user table (shadowed). Ignored if
2210 * root page
2211 * @param ppPage Where to store the pointer to the page.
2212 */
2213static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
2214 uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2215{
2216 /*
2217 * Look up the GCPhys in the hash.
2218 */
2219 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2220 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2221 if (i != NIL_PGMPOOL_IDX)
2222 {
2223 do
2224 {
2225 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2226 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2227 if (pPage->GCPhys == GCPhys)
2228 {
2229 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2230 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess
2231 && pPage->fA20Enabled == fA20Enabled)
2232 {
2233 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2234 * doesn't flush it in case there are no more free use records.
2235 */
2236 pgmPoolCacheUsed(pPool, pPage);
2237
2238 int rc = VINF_SUCCESS;
2239 if (iUser != NIL_PGMPOOL_IDX)
2240 rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2241 if (RT_SUCCESS(rc))
2242 {
2243 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2244 *ppPage = pPage;
2245 if (pPage->cModifications)
2246 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2247 STAM_COUNTER_INC(&pPool->StatCacheHits);
2248 return VINF_PGM_CACHED_PAGE;
2249 }
2250 return rc;
2251 }
2252
2253 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2254 {
2255 /*
2256 * The kind is different. In some cases we should now flush the page
2257 * as it has been reused, but in most cases this is normal remapping
2258 * of PDs as PT or big pages using the GCPhys field in a slightly
2259 * different way than the other kinds.
2260 */
2261 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2262 {
2263 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2264 pgmPoolFlushPage(pPool, pPage);
2265 break;
2266 }
2267 }
2268 }
2269
2270 /* next */
2271 i = pPage->iNext;
2272 } while (i != NIL_PGMPOOL_IDX);
2273 }
2274
2275 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2276 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2277 return VERR_FILE_NOT_FOUND;
2278}
2279
2280
2281/**
2282 * Inserts a page into the cache.
2283 *
2284 * @param pPool The pool.
2285 * @param pPage The cached page.
2286 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2287 */
2288static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2289{
2290 /*
2291 * Insert into the GCPhys hash if the page is fit for that.
2292 */
2293 Assert(!pPage->fCached);
2294 if (fCanBeCached)
2295 {
2296 pPage->fCached = true;
2297 pgmPoolHashInsert(pPool, pPage);
2298 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2299 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2300 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2301 }
2302 else
2303 {
2304 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2305 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2306 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2307 }
2308
2309 /*
2310 * Insert at the head of the age list.
2311 */
2312 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2313 pPage->iAgeNext = pPool->iAgeHead;
2314 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2315 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2316 else
2317 pPool->iAgeTail = pPage->idx;
2318 pPool->iAgeHead = pPage->idx;
2319}
2320
2321
2322/**
2323 * Flushes a cached page.
2324 *
2325 * @param pPool The pool.
2326 * @param pPage The cached page.
2327 */
2328static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2329{
2330 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2331
2332 /*
2333 * Remove the page from the hash.
2334 */
2335 if (pPage->fCached)
2336 {
2337 pPage->fCached = false;
2338 pgmPoolHashRemove(pPool, pPage);
2339 }
2340 else
2341 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2342
2343 /*
2344 * Remove it from the age list.
2345 */
2346 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2347 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2348 else
2349 pPool->iAgeTail = pPage->iAgePrev;
2350 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2351 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2352 else
2353 pPool->iAgeHead = pPage->iAgeNext;
2354 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2355 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2356}
2357
2358
2359/**
2360 * Looks for pages sharing the monitor.
2361 *
2362 * @returns Pointer to the head page.
2363 * @returns NULL if not found.
2364 * @param pPool The Pool
2365 * @param pNewPage The page which is going to be monitored.
2366 */
2367static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2368{
2369 /*
2370 * Look up the GCPhys in the hash.
2371 */
2372 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2373 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2374 if (i == NIL_PGMPOOL_IDX)
2375 return NULL;
2376 do
2377 {
2378 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2379 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2380 && pPage != pNewPage)
2381 {
2382 switch (pPage->enmKind)
2383 {
2384 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2385 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2386 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2387 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2388 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2389 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2390 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2391 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2392 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2393 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2394 case PGMPOOLKIND_64BIT_PML4:
2395 case PGMPOOLKIND_32BIT_PD:
2396 case PGMPOOLKIND_PAE_PDPT:
2397 {
2398 /* find the head */
2399 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2400 {
2401 Assert(pPage->iMonitoredPrev != pPage->idx);
2402 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2403 }
2404 return pPage;
2405 }
2406
2407 /* ignore, no monitoring. */
2408 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2409 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2410 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2411 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2412 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2413 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2414 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2415 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2416 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2417 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2418 case PGMPOOLKIND_ROOT_NESTED:
2419 case PGMPOOLKIND_PAE_PD_PHYS:
2420 case PGMPOOLKIND_PAE_PDPT_PHYS:
2421 case PGMPOOLKIND_32BIT_PD_PHYS:
2422 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2423 break;
2424 default:
2425 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2426 }
2427 }
2428
2429 /* next */
2430 i = pPage->iNext;
2431 } while (i != NIL_PGMPOOL_IDX);
2432 return NULL;
2433}
2434
2435
2436/**
2437 * Enabled write monitoring of a guest page.
2438 *
2439 * @returns VBox status code.
2440 * @retval VINF_SUCCESS on success.
2441 * @param pPool The pool.
2442 * @param pPage The cached page.
2443 */
2444static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2445{
2446 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2447
2448 /*
2449 * Filter out the relevant kinds.
2450 */
2451 switch (pPage->enmKind)
2452 {
2453 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2454 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2455 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2456 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2457 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2458 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2459 case PGMPOOLKIND_64BIT_PML4:
2460 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2461 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2462 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2463 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2464 case PGMPOOLKIND_32BIT_PD:
2465 case PGMPOOLKIND_PAE_PDPT:
2466 break;
2467
2468 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2469 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2470 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2471 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2472 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2473 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2474 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2475 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2476 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2477 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2478 case PGMPOOLKIND_ROOT_NESTED:
2479 /* Nothing to monitor here. */
2480 return VINF_SUCCESS;
2481
2482 case PGMPOOLKIND_32BIT_PD_PHYS:
2483 case PGMPOOLKIND_PAE_PDPT_PHYS:
2484 case PGMPOOLKIND_PAE_PD_PHYS:
2485 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2486 /* Nothing to monitor here. */
2487 return VINF_SUCCESS;
2488 default:
2489 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2490 }
2491
2492 /*
2493 * Install handler.
2494 */
2495 int rc;
2496 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2497 if (pPageHead)
2498 {
2499 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2500 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2501
2502#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2503 if (pPageHead->fDirty)
2504 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirtyEntry, false /* do not remove */);
2505#endif
2506
2507 pPage->iMonitoredPrev = pPageHead->idx;
2508 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2509 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2510 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2511 pPageHead->iMonitoredNext = pPage->idx;
2512 rc = VINF_SUCCESS;
2513 }
2514 else
2515 {
2516 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2517 PVMCC pVM = pPool->CTX_SUFF(pVM);
2518 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2519 rc = PGMHandlerPhysicalRegister(pVM, GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK, pPool->hAccessHandlerType,
2520 MMHyperCCToR3(pVM, pPage), MMHyperCCToR0(pVM, pPage), MMHyperCCToRC(pVM, pPage),
2521 NIL_RTR3PTR /*pszDesc*/);
2522 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2523 * the heap size should suffice. */
2524 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2525 PVMCPU pVCpu = VMMGetCpu(pVM);
2526 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2527 }
2528 pPage->fMonitored = true;
2529 return rc;
2530}
2531
2532
2533/**
2534 * Disables write monitoring of a guest page.
2535 *
2536 * @returns VBox status code.
2537 * @retval VINF_SUCCESS on success.
2538 * @param pPool The pool.
2539 * @param pPage The cached page.
2540 */
2541static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2542{
2543 /*
2544 * Filter out the relevant kinds.
2545 */
2546 switch (pPage->enmKind)
2547 {
2548 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2549 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2550 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2551 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2552 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2553 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2554 case PGMPOOLKIND_64BIT_PML4:
2555 case PGMPOOLKIND_32BIT_PD:
2556 case PGMPOOLKIND_PAE_PDPT:
2557 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2558 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2559 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2560 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2561 break;
2562
2563 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2564 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2565 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2566 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2567 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2568 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2569 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2570 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2571 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2572 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2573 case PGMPOOLKIND_ROOT_NESTED:
2574 case PGMPOOLKIND_PAE_PD_PHYS:
2575 case PGMPOOLKIND_PAE_PDPT_PHYS:
2576 case PGMPOOLKIND_32BIT_PD_PHYS:
2577 /* Nothing to monitor here. */
2578 Assert(!pPage->fMonitored);
2579 return VINF_SUCCESS;
2580
2581 default:
2582 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2583 }
2584 Assert(pPage->fMonitored);
2585
2586 /*
2587 * Remove the page from the monitored list or uninstall it if last.
2588 */
2589 const PVMCC pVM = pPool->CTX_SUFF(pVM);
2590 int rc;
2591 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2592 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2593 {
2594 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2595 {
2596 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2597 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2598 rc = PGMHandlerPhysicalChangeUserArgs(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK,
2599 MMHyperCCToR3(pVM, pNewHead), MMHyperCCToR0(pVM, pNewHead));
2600
2601 AssertFatalRCSuccess(rc);
2602 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2603 }
2604 else
2605 {
2606 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2607 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2608 {
2609 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2610 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2611 }
2612 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2613 rc = VINF_SUCCESS;
2614 }
2615 }
2616 else
2617 {
2618 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2619 AssertFatalRC(rc);
2620 PVMCPU pVCpu = VMMGetCpu(pVM);
2621 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2622 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2623 }
2624 pPage->fMonitored = false;
2625
2626 /*
2627 * Remove it from the list of modified pages (if in it).
2628 */
2629 pgmPoolMonitorModifiedRemove(pPool, pPage);
2630
2631 return rc;
2632}
2633
2634
2635/**
2636 * Inserts the page into the list of modified pages.
2637 *
2638 * @param pPool The pool.
2639 * @param pPage The page.
2640 */
2641void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2642{
2643 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2644 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2645 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2646 && pPool->iModifiedHead != pPage->idx,
2647 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2648 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2649 pPool->iModifiedHead, pPool->cModifiedPages));
2650
2651 pPage->iModifiedNext = pPool->iModifiedHead;
2652 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2653 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2654 pPool->iModifiedHead = pPage->idx;
2655 pPool->cModifiedPages++;
2656#ifdef VBOX_WITH_STATISTICS
2657 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2658 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2659#endif
2660}
2661
2662
2663/**
2664 * Removes the page from the list of modified pages and resets the
2665 * modification counter.
2666 *
2667 * @param pPool The pool.
2668 * @param pPage The page which is believed to be in the list of modified pages.
2669 */
2670static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2671{
2672 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2673 if (pPool->iModifiedHead == pPage->idx)
2674 {
2675 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2676 pPool->iModifiedHead = pPage->iModifiedNext;
2677 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2678 {
2679 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2680 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2681 }
2682 pPool->cModifiedPages--;
2683 }
2684 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2685 {
2686 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2687 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2688 {
2689 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2690 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2691 }
2692 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2693 pPool->cModifiedPages--;
2694 }
2695 else
2696 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2697 pPage->cModifications = 0;
2698}
2699
2700
2701/**
2702 * Zaps the list of modified pages, resetting their modification counters in the process.
2703 *
2704 * @param pVM The cross context VM structure.
2705 */
2706static void pgmPoolMonitorModifiedClearAll(PVMCC pVM)
2707{
2708 pgmLock(pVM);
2709 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2710 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2711
2712 unsigned cPages = 0; NOREF(cPages);
2713
2714#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2715 pgmPoolResetDirtyPages(pVM);
2716#endif
2717
2718 uint16_t idx = pPool->iModifiedHead;
2719 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2720 while (idx != NIL_PGMPOOL_IDX)
2721 {
2722 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2723 idx = pPage->iModifiedNext;
2724 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2725 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2726 pPage->cModifications = 0;
2727 Assert(++cPages);
2728 }
2729 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2730 pPool->cModifiedPages = 0;
2731 pgmUnlock(pVM);
2732}
2733
2734
2735/**
2736 * Handle SyncCR3 pool tasks
2737 *
2738 * @returns VBox status code.
2739 * @retval VINF_SUCCESS if successfully added.
2740 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2741 * @param pVCpu The cross context virtual CPU structure.
2742 * @remark Should only be used when monitoring is available, thus placed in
2743 * the PGMPOOL_WITH_MONITORING \#ifdef.
2744 */
2745int pgmPoolSyncCR3(PVMCPUCC pVCpu)
2746{
2747 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2748 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2749
2750 /*
2751 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2752 * Occasionally we will have to clear all the shadow page tables because we wanted
2753 * to monitor a page which was mapped by too many shadowed page tables. This operation
2754 * sometimes referred to as a 'lightweight flush'.
2755 */
2756# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2757 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2758 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2759# else /* !IN_RING3 */
2760 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2761 {
2762 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2763 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2764
2765 /* Make sure all other VCPUs return to ring 3. */
2766 if (pVM->cCpus > 1)
2767 {
2768 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2769 PGM_INVL_ALL_VCPU_TLBS(pVM);
2770 }
2771 return VINF_PGM_SYNC_CR3;
2772 }
2773# endif /* !IN_RING3 */
2774 else
2775 {
2776 pgmPoolMonitorModifiedClearAll(pVM);
2777
2778 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2779 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2780 {
2781 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2782 return pgmPoolSyncCR3(pVCpu);
2783 }
2784 }
2785 return VINF_SUCCESS;
2786}
2787
2788
2789/**
2790 * Frees up at least one user entry.
2791 *
2792 * @returns VBox status code.
2793 * @retval VINF_SUCCESS if successfully added.
2794 *
2795 * @param pPool The pool.
2796 * @param iUser The user index.
2797 */
2798static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2799{
2800 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2801 /*
2802 * Just free cached pages in a braindead fashion.
2803 */
2804 /** @todo walk the age list backwards and free the first with usage. */
2805 int rc = VINF_SUCCESS;
2806 do
2807 {
2808 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2809 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2810 rc = rc2;
2811 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2812 return rc;
2813}
2814
2815
2816/**
2817 * Inserts a page into the cache.
2818 *
2819 * This will create user node for the page, insert it into the GCPhys
2820 * hash, and insert it into the age list.
2821 *
2822 * @returns VBox status code.
2823 * @retval VINF_SUCCESS if successfully added.
2824 *
2825 * @param pPool The pool.
2826 * @param pPage The cached page.
2827 * @param GCPhys The GC physical address of the page we're gonna shadow.
2828 * @param iUser The user index.
2829 * @param iUserTable The user table index.
2830 */
2831DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2832{
2833 int rc = VINF_SUCCESS;
2834 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2835
2836 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable)); RT_NOREF_PV(GCPhys);
2837
2838 if (iUser != NIL_PGMPOOL_IDX)
2839 {
2840#ifdef VBOX_STRICT
2841 /*
2842 * Check that the entry doesn't already exists.
2843 */
2844 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2845 {
2846 uint16_t i = pPage->iUserHead;
2847 do
2848 {
2849 Assert(i < pPool->cMaxUsers);
2850 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2851 i = paUsers[i].iNext;
2852 } while (i != NIL_PGMPOOL_USER_INDEX);
2853 }
2854#endif
2855
2856 /*
2857 * Find free a user node.
2858 */
2859 uint16_t i = pPool->iUserFreeHead;
2860 if (i == NIL_PGMPOOL_USER_INDEX)
2861 {
2862 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2863 if (RT_FAILURE(rc))
2864 return rc;
2865 i = pPool->iUserFreeHead;
2866 }
2867
2868 /*
2869 * Unlink the user node from the free list,
2870 * initialize and insert it into the user list.
2871 */
2872 pPool->iUserFreeHead = paUsers[i].iNext;
2873 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2874 paUsers[i].iUser = iUser;
2875 paUsers[i].iUserTable = iUserTable;
2876 pPage->iUserHead = i;
2877 }
2878 else
2879 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2880
2881
2882 /*
2883 * Insert into cache and enable monitoring of the guest page if enabled.
2884 *
2885 * Until we implement caching of all levels, including the CR3 one, we'll
2886 * have to make sure we don't try monitor & cache any recursive reuse of
2887 * a monitored CR3 page. Because all windows versions are doing this we'll
2888 * have to be able to do combined access monitoring, CR3 + PT and
2889 * PD + PT (guest PAE).
2890 *
2891 * Update:
2892 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2893 */
2894 const bool fCanBeMonitored = true;
2895 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2896 if (fCanBeMonitored)
2897 {
2898 rc = pgmPoolMonitorInsert(pPool, pPage);
2899 AssertRC(rc);
2900 }
2901 return rc;
2902}
2903
2904
2905/**
2906 * Adds a user reference to a page.
2907 *
2908 * This will move the page to the head of the
2909 *
2910 * @returns VBox status code.
2911 * @retval VINF_SUCCESS if successfully added.
2912 *
2913 * @param pPool The pool.
2914 * @param pPage The cached page.
2915 * @param iUser The user index.
2916 * @param iUserTable The user table.
2917 */
2918static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2919{
2920 Log3(("pgmPoolTrackAddUser: GCPhys=%RGp iUser=%x iUserTable=%x\n", pPage->GCPhys, iUser, iUserTable));
2921 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2922 Assert(iUser != NIL_PGMPOOL_IDX);
2923
2924# ifdef VBOX_STRICT
2925 /*
2926 * Check that the entry doesn't already exists. We only allow multiple
2927 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
2928 */
2929 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2930 {
2931 uint16_t i = pPage->iUserHead;
2932 do
2933 {
2934 Assert(i < pPool->cMaxUsers);
2935 /** @todo this assertion looks odd... Shouldn't it be && here? */
2936 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2937 i = paUsers[i].iNext;
2938 } while (i != NIL_PGMPOOL_USER_INDEX);
2939 }
2940# endif
2941
2942 /*
2943 * Allocate a user node.
2944 */
2945 uint16_t i = pPool->iUserFreeHead;
2946 if (i == NIL_PGMPOOL_USER_INDEX)
2947 {
2948 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2949 if (RT_FAILURE(rc))
2950 return rc;
2951 i = pPool->iUserFreeHead;
2952 }
2953 pPool->iUserFreeHead = paUsers[i].iNext;
2954
2955 /*
2956 * Initialize the user node and insert it.
2957 */
2958 paUsers[i].iNext = pPage->iUserHead;
2959 paUsers[i].iUser = iUser;
2960 paUsers[i].iUserTable = iUserTable;
2961 pPage->iUserHead = i;
2962
2963# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2964 if (pPage->fDirty)
2965 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirtyEntry, false /* do not remove */);
2966# endif
2967
2968 /*
2969 * Tell the cache to update its replacement stats for this page.
2970 */
2971 pgmPoolCacheUsed(pPool, pPage);
2972 return VINF_SUCCESS;
2973}
2974
2975
2976/**
2977 * Frees a user record associated with a page.
2978 *
2979 * This does not clear the entry in the user table, it simply replaces the
2980 * user record to the chain of free records.
2981 *
2982 * @param pPool The pool.
2983 * @param pPage The shadow page.
2984 * @param iUser The shadow page pool index of the user table.
2985 * @param iUserTable The index into the user table (shadowed).
2986 *
2987 * @remarks Don't call this for root pages.
2988 */
2989static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2990{
2991 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2992 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2993 Assert(iUser != NIL_PGMPOOL_IDX);
2994
2995 /*
2996 * Unlink and free the specified user entry.
2997 */
2998
2999 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
3000 uint16_t i = pPage->iUserHead;
3001 if ( i != NIL_PGMPOOL_USER_INDEX
3002 && paUsers[i].iUser == iUser
3003 && paUsers[i].iUserTable == iUserTable)
3004 {
3005 pPage->iUserHead = paUsers[i].iNext;
3006
3007 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3008 paUsers[i].iNext = pPool->iUserFreeHead;
3009 pPool->iUserFreeHead = i;
3010 return;
3011 }
3012
3013 /* General: Linear search. */
3014 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
3015 while (i != NIL_PGMPOOL_USER_INDEX)
3016 {
3017 if ( paUsers[i].iUser == iUser
3018 && paUsers[i].iUserTable == iUserTable)
3019 {
3020 if (iPrev != NIL_PGMPOOL_USER_INDEX)
3021 paUsers[iPrev].iNext = paUsers[i].iNext;
3022 else
3023 pPage->iUserHead = paUsers[i].iNext;
3024
3025 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3026 paUsers[i].iNext = pPool->iUserFreeHead;
3027 pPool->iUserFreeHead = i;
3028 return;
3029 }
3030 iPrev = i;
3031 i = paUsers[i].iNext;
3032 }
3033
3034 /* Fatal: didn't find it */
3035 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
3036 iUser, iUserTable, pPage->GCPhys));
3037}
3038
3039
3040#if 0 /* unused */
3041/**
3042 * Gets the entry size of a shadow table.
3043 *
3044 * @param enmKind The kind of page.
3045 *
3046 * @returns The size of the entry in bytes. That is, 4 or 8.
3047 * @returns If the kind is not for a table, an assertion is raised and 0 is
3048 * returned.
3049 */
3050DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
3051{
3052 switch (enmKind)
3053 {
3054 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3055 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3056 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3057 case PGMPOOLKIND_32BIT_PD:
3058 case PGMPOOLKIND_32BIT_PD_PHYS:
3059 return 4;
3060
3061 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3062 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3063 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3064 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3065 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3066 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3067 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3068 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3069 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3070 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3071 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3072 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3073 case PGMPOOLKIND_64BIT_PML4:
3074 case PGMPOOLKIND_PAE_PDPT:
3075 case PGMPOOLKIND_ROOT_NESTED:
3076 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3077 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3078 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3079 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3080 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3081 case PGMPOOLKIND_PAE_PD_PHYS:
3082 case PGMPOOLKIND_PAE_PDPT_PHYS:
3083 return 8;
3084
3085 default:
3086 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3087 }
3088}
3089#endif /* unused */
3090
3091#if 0 /* unused */
3092/**
3093 * Gets the entry size of a guest table.
3094 *
3095 * @param enmKind The kind of page.
3096 *
3097 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3098 * @returns If the kind is not for a table, an assertion is raised and 0 is
3099 * returned.
3100 */
3101DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3102{
3103 switch (enmKind)
3104 {
3105 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3106 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3107 case PGMPOOLKIND_32BIT_PD:
3108 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3109 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3110 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3111 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3112 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3113 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3114 return 4;
3115
3116 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3117 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3118 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3119 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3120 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3121 case PGMPOOLKIND_64BIT_PML4:
3122 case PGMPOOLKIND_PAE_PDPT:
3123 return 8;
3124
3125 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3126 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3127 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3128 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3129 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3130 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3131 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3132 case PGMPOOLKIND_ROOT_NESTED:
3133 case PGMPOOLKIND_PAE_PD_PHYS:
3134 case PGMPOOLKIND_PAE_PDPT_PHYS:
3135 case PGMPOOLKIND_32BIT_PD_PHYS:
3136 /** @todo can we return 0? (nobody is calling this...) */
3137 AssertFailed();
3138 return 0;
3139
3140 default:
3141 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3142 }
3143}
3144#endif /* unused */
3145
3146
3147/**
3148 * Checks one shadow page table entry for a mapping of a physical page.
3149 *
3150 * @returns true / false indicating removal of all relevant PTEs
3151 *
3152 * @param pVM The cross context VM structure.
3153 * @param pPhysPage The guest page in question.
3154 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3155 * @param iShw The shadow page table.
3156 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3157 */
3158static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3159{
3160 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3161 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3162 bool fRet = false;
3163
3164 /*
3165 * Assert sanity.
3166 */
3167 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3168 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3169 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3170
3171 /*
3172 * Then, clear the actual mappings to the page in the shadow PT.
3173 */
3174 switch (pPage->enmKind)
3175 {
3176 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3177 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3178 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3179 {
3180 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3181 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3182 uint32_t u32AndMask = 0;
3183 uint32_t u32OrMask = 0;
3184
3185 if (!fFlushPTEs)
3186 {
3187 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3188 {
3189 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3190 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3191 u32OrMask = X86_PTE_RW;
3192 u32AndMask = UINT32_MAX;
3193 fRet = true;
3194 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3195 break;
3196
3197 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3198 u32OrMask = 0;
3199 u32AndMask = ~X86_PTE_RW;
3200 fRet = true;
3201 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3202 break;
3203 default:
3204 /* (shouldn't be here, will assert below) */
3205 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3206 break;
3207 }
3208 }
3209 else
3210 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3211
3212 /* Update the counter if we're removing references. */
3213 if (!u32AndMask)
3214 {
3215 Assert(pPage->cPresent);
3216 Assert(pPool->cPresent);
3217 pPage->cPresent--;
3218 pPool->cPresent--;
3219 }
3220
3221 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3222 {
3223 X86PTE Pte;
3224
3225 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3226 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3227 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3228 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3229
3230 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3231 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3232 return fRet;
3233 }
3234#ifdef LOG_ENABLED
3235 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3236 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3237 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3238 {
3239 Log(("i=%d cFound=%d\n", i, ++cFound));
3240 }
3241#endif
3242 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3243 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3244 break;
3245 }
3246
3247 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3248 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3249 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3250 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3251 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3252 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3253 {
3254 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3255 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3256 uint64_t u64OrMask = 0;
3257 uint64_t u64AndMask = 0;
3258
3259 if (!fFlushPTEs)
3260 {
3261 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3262 {
3263 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3264 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3265 u64OrMask = X86_PTE_RW;
3266 u64AndMask = UINT64_MAX;
3267 fRet = true;
3268 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3269 break;
3270
3271 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3272 u64OrMask = 0;
3273 u64AndMask = ~(uint64_t)X86_PTE_RW;
3274 fRet = true;
3275 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3276 break;
3277
3278 default:
3279 /* (shouldn't be here, will assert below) */
3280 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3281 break;
3282 }
3283 }
3284 else
3285 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3286
3287 /* Update the counter if we're removing references. */
3288 if (!u64AndMask)
3289 {
3290 Assert(pPage->cPresent);
3291 Assert(pPool->cPresent);
3292 pPage->cPresent--;
3293 pPool->cPresent--;
3294 }
3295
3296 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3297 {
3298 X86PTEPAE Pte;
3299
3300 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3301 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3302 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3303 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3304
3305 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3306 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3307 return fRet;
3308 }
3309#ifdef LOG_ENABLED
3310 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3311 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3312 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3313 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3314 Log(("i=%d cFound=%d\n", i, ++cFound));
3315#endif
3316 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3317 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3318 break;
3319 }
3320
3321#ifdef PGM_WITH_LARGE_PAGES
3322 /* Large page case only. */
3323 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3324 {
3325 Assert(pVM->pgm.s.fNestedPaging);
3326
3327 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3328 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3329
3330 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3331 {
3332 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3333 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3334 pPD->a[iPte].u = 0;
3335 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3336
3337 /* Update the counter as we're removing references. */
3338 Assert(pPage->cPresent);
3339 Assert(pPool->cPresent);
3340 pPage->cPresent--;
3341 pPool->cPresent--;
3342
3343 return fRet;
3344 }
3345# ifdef LOG_ENABLED
3346 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3347 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3348 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3349 Log(("i=%d cFound=%d\n", i, ++cFound));
3350# endif
3351 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3352 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3353 break;
3354 }
3355
3356 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3357 case PGMPOOLKIND_PAE_PD_PHYS:
3358 {
3359 Assert(pVM->pgm.s.fNestedPaging);
3360
3361 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3362 PX86PD pPD = (PX86PD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3363
3364 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3365 {
3366 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3367 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3368 pPD->a[iPte].u = 0;
3369 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3370
3371 /* Update the counter as we're removing references. */
3372 Assert(pPage->cPresent);
3373 Assert(pPool->cPresent);
3374 pPage->cPresent--;
3375 pPool->cPresent--;
3376 return fRet;
3377 }
3378# ifdef LOG_ENABLED
3379 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3380 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3381 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3382 Log(("i=%d cFound=%d\n", i, ++cFound));
3383# endif
3384 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3385 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3386 break;
3387 }
3388#endif /* PGM_WITH_LARGE_PAGES */
3389
3390 default:
3391 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3392 }
3393
3394 /* not reached. */
3395#ifndef _MSC_VER
3396 return fRet;
3397#endif
3398}
3399
3400
3401/**
3402 * Scans one shadow page table for mappings of a physical page.
3403 *
3404 * @param pVM The cross context VM structure.
3405 * @param pPhysPage The guest page in question.
3406 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3407 * @param iShw The shadow page table.
3408 */
3409static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3410{
3411 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3412
3413 /* We should only come here with when there's only one reference to this physical page. */
3414 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3415
3416 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3417 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3418 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3419 if (!fKeptPTEs)
3420 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3421 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3422}
3423
3424
3425/**
3426 * Flushes a list of shadow page tables mapping the same physical page.
3427 *
3428 * @param pVM The cross context VM structure.
3429 * @param pPhysPage The guest page in question.
3430 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3431 * @param iPhysExt The physical cross reference extent list to flush.
3432 */
3433static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3434{
3435 PGM_LOCK_ASSERT_OWNER(pVM);
3436 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3437 bool fKeepList = false;
3438
3439 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3440 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt=%u\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3441
3442 const uint16_t iPhysExtStart = iPhysExt;
3443 PPGMPOOLPHYSEXT pPhysExt;
3444 do
3445 {
3446 Assert(iPhysExt < pPool->cMaxPhysExts);
3447 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3448 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3449 {
3450 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3451 {
3452 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3453 if (!fKeptPTEs)
3454 {
3455 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3456 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3457 }
3458 else
3459 fKeepList = true;
3460 }
3461 }
3462 /* next */
3463 iPhysExt = pPhysExt->iNext;
3464 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3465
3466 if (!fKeepList)
3467 {
3468 /* insert the list into the free list and clear the ram range entry. */
3469 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3470 pPool->iPhysExtFreeHead = iPhysExtStart;
3471 /* Invalidate the tracking data. */
3472 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3473 }
3474
3475 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3476}
3477
3478
3479/**
3480 * Flushes all shadow page table mappings of the given guest page.
3481 *
3482 * This is typically called when the host page backing the guest one has been
3483 * replaced or when the page protection was changed due to a guest access
3484 * caught by the monitoring.
3485 *
3486 * @returns VBox status code.
3487 * @retval VINF_SUCCESS if all references has been successfully cleared.
3488 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3489 * pool cleaning. FF and sync flags are set.
3490 *
3491 * @param pVM The cross context VM structure.
3492 * @param GCPhysPage GC physical address of the page in question
3493 * @param pPhysPage The guest page in question.
3494 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3495 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3496 * flushed, it is NOT touched if this isn't necessary.
3497 * The caller MUST initialized this to @a false.
3498 */
3499int pgmPoolTrackUpdateGCPhys(PVMCC pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3500{
3501 PVMCPUCC pVCpu = VMMGetCpu(pVM);
3502 pgmLock(pVM);
3503 int rc = VINF_SUCCESS;
3504
3505#ifdef PGM_WITH_LARGE_PAGES
3506 /* Is this page part of a large page? */
3507 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3508 {
3509 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3510 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3511
3512 /* Fetch the large page base. */
3513 PPGMPAGE pLargePage;
3514 if (GCPhysBase != GCPhysPage)
3515 {
3516 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3517 AssertFatal(pLargePage);
3518 }
3519 else
3520 pLargePage = pPhysPage;
3521
3522 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3523
3524 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3525 {
3526 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3527 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3528 pVM->pgm.s.cLargePagesDisabled++;
3529
3530 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3531 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3532
3533 *pfFlushTLBs = true;
3534 pgmUnlock(pVM);
3535 return rc;
3536 }
3537 }
3538#else
3539 NOREF(GCPhysPage);
3540#endif /* PGM_WITH_LARGE_PAGES */
3541
3542 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3543 if (u16)
3544 {
3545 /*
3546 * The zero page is currently screwing up the tracking and we'll
3547 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3548 * is defined, zero pages won't normally be mapped. Some kind of solution
3549 * will be needed for this problem of course, but it will have to wait...
3550 */
3551 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3552 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3553 rc = VINF_PGM_GCPHYS_ALIASED;
3554 else
3555 {
3556# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 /** @todo we can drop this now. */
3557 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3558 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3559 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
3560# endif
3561
3562 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3563 {
3564 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3565 pgmPoolTrackFlushGCPhysPT(pVM,
3566 pPhysPage,
3567 fFlushPTEs,
3568 PGMPOOL_TD_GET_IDX(u16));
3569 }
3570 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3571 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3572 else
3573 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3574 *pfFlushTLBs = true;
3575
3576# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3577 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
3578# endif
3579 }
3580 }
3581
3582 if (rc == VINF_PGM_GCPHYS_ALIASED)
3583 {
3584 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3585 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3586 rc = VINF_PGM_SYNC_CR3;
3587 }
3588 pgmUnlock(pVM);
3589 return rc;
3590}
3591
3592
3593/**
3594 * Scans all shadow page tables for mappings of a physical page.
3595 *
3596 * This may be slow, but it's most likely more efficient than cleaning
3597 * out the entire page pool / cache.
3598 *
3599 * @returns VBox status code.
3600 * @retval VINF_SUCCESS if all references has been successfully cleared.
3601 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3602 * a page pool cleaning.
3603 *
3604 * @param pVM The cross context VM structure.
3605 * @param pPhysPage The guest page in question.
3606 */
3607int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage)
3608{
3609 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3610 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3611 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3612 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3613
3614 /*
3615 * There is a limit to what makes sense.
3616 */
3617 if ( pPool->cPresent > 1024
3618 && pVM->cCpus == 1)
3619 {
3620 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3621 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3622 return VINF_PGM_GCPHYS_ALIASED;
3623 }
3624
3625 /*
3626 * Iterate all the pages until we've encountered all that in use.
3627 * This is simple but not quite optimal solution.
3628 */
3629 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P; /** @todo drop X86_PTE_P here as we always test if present separately, anyway. */
3630 const uint32_t u32 = u64; /** @todo move into the 32BIT_PT_xx case */
3631 unsigned cLeft = pPool->cUsedPages;
3632 unsigned iPage = pPool->cCurPages;
3633 while (--iPage >= PGMPOOL_IDX_FIRST)
3634 {
3635 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3636 if ( pPage->GCPhys != NIL_RTGCPHYS
3637 && pPage->cPresent)
3638 {
3639 switch (pPage->enmKind)
3640 {
3641 /*
3642 * We only care about shadow page tables.
3643 */
3644 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3645 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3646 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3647 {
3648 unsigned cPresent = pPage->cPresent;
3649 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3650 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3651 if (pPT->a[i].n.u1Present)
3652 {
3653 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3654 {
3655 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3656 pPT->a[i].u = 0;
3657
3658 /* Update the counter as we're removing references. */
3659 Assert(pPage->cPresent);
3660 Assert(pPool->cPresent);
3661 pPage->cPresent--;
3662 pPool->cPresent--;
3663 }
3664 if (!--cPresent)
3665 break;
3666 }
3667 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3668 break;
3669 }
3670
3671 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3672 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3673 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3674 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3675 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3676 {
3677 unsigned cPresent = pPage->cPresent;
3678 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3679 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3680 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3681 {
3682 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3683 {
3684 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3685 PGMSHWPTEPAE_SET(pPT->a[i], 0); /// @todo why not atomic?
3686
3687 /* Update the counter as we're removing references. */
3688 Assert(pPage->cPresent);
3689 Assert(pPool->cPresent);
3690 pPage->cPresent--;
3691 pPool->cPresent--;
3692 }
3693 if (!--cPresent)
3694 break;
3695 }
3696 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3697 break;
3698 }
3699
3700 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3701 {
3702 unsigned cPresent = pPage->cPresent;
3703 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3704 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3705 if (pPT->a[i].n.u1Present)
3706 {
3707 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3708 {
3709 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3710 pPT->a[i].u = 0;
3711
3712 /* Update the counter as we're removing references. */
3713 Assert(pPage->cPresent);
3714 Assert(pPool->cPresent);
3715 pPage->cPresent--;
3716 pPool->cPresent--;
3717 }
3718 if (!--cPresent)
3719 break;
3720 }
3721 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3722 break;
3723 }
3724 }
3725
3726 if (!--cLeft)
3727 break;
3728 }
3729 }
3730
3731 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3732 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3733
3734 /*
3735 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3736 */
3737 if (pPool->cPresent > 1024)
3738 {
3739 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3740 return VINF_PGM_GCPHYS_ALIASED;
3741 }
3742
3743 return VINF_SUCCESS;
3744}
3745
3746
3747/**
3748 * Clears the user entry in a user table.
3749 *
3750 * This is used to remove all references to a page when flushing it.
3751 */
3752static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3753{
3754 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3755 Assert(pUser->iUser < pPool->cCurPages);
3756 uint32_t iUserTable = pUser->iUserTable;
3757
3758 /*
3759 * Map the user page. Ignore references made by fictitious pages.
3760 */
3761 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3762 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3763 union
3764 {
3765 uint64_t *pau64;
3766 uint32_t *pau32;
3767 } u;
3768 if (pUserPage->idx < PGMPOOL_IDX_FIRST)
3769 {
3770 Assert(!pUserPage->pvPageR3);
3771 return;
3772 }
3773 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3774
3775
3776 /* Safety precaution in case we change the paging for other modes too in the future. */
3777 Assert(!pgmPoolIsPageLocked(pPage)); RT_NOREF_PV(pPage);
3778
3779#ifdef VBOX_STRICT
3780 /*
3781 * Some sanity checks.
3782 */
3783 switch (pUserPage->enmKind)
3784 {
3785 case PGMPOOLKIND_32BIT_PD:
3786 case PGMPOOLKIND_32BIT_PD_PHYS:
3787 Assert(iUserTable < X86_PG_ENTRIES);
3788 break;
3789 case PGMPOOLKIND_PAE_PDPT:
3790 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3791 case PGMPOOLKIND_PAE_PDPT_PHYS:
3792 Assert(iUserTable < 4);
3793 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3794 break;
3795 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3796 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3797 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3798 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3799 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3800 case PGMPOOLKIND_PAE_PD_PHYS:
3801 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3802 break;
3803 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3804 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3805 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3806 break;
3807 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3808 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3809 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3810 break;
3811 case PGMPOOLKIND_64BIT_PML4:
3812 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3813 /* GCPhys >> PAGE_SHIFT is the index here */
3814 break;
3815 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3816 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3817 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3818 break;
3819
3820 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3821 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3822 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3823 break;
3824
3825 case PGMPOOLKIND_ROOT_NESTED:
3826 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3827 break;
3828
3829 default:
3830 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3831 break;
3832 }
3833#endif /* VBOX_STRICT */
3834
3835 /*
3836 * Clear the entry in the user page.
3837 */
3838 switch (pUserPage->enmKind)
3839 {
3840 /* 32-bit entries */
3841 case PGMPOOLKIND_32BIT_PD:
3842 case PGMPOOLKIND_32BIT_PD_PHYS:
3843 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
3844 break;
3845
3846 /* 64-bit entries */
3847 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3848 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3849 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3850 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3851 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3852 case PGMPOOLKIND_PAE_PD_PHYS:
3853 case PGMPOOLKIND_PAE_PDPT_PHYS:
3854 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3855 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3856 case PGMPOOLKIND_64BIT_PML4:
3857 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3858 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3859 case PGMPOOLKIND_PAE_PDPT:
3860 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3861 case PGMPOOLKIND_ROOT_NESTED:
3862 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3863 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3864 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
3865 break;
3866
3867 default:
3868 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3869 }
3870 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
3871}
3872
3873
3874/**
3875 * Clears all users of a page.
3876 */
3877static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3878{
3879 /*
3880 * Free all the user records.
3881 */
3882 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3883
3884 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3885 uint16_t i = pPage->iUserHead;
3886 while (i != NIL_PGMPOOL_USER_INDEX)
3887 {
3888 /* Clear enter in user table. */
3889 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3890
3891 /* Free it. */
3892 const uint16_t iNext = paUsers[i].iNext;
3893 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3894 paUsers[i].iNext = pPool->iUserFreeHead;
3895 pPool->iUserFreeHead = i;
3896
3897 /* Next. */
3898 i = iNext;
3899 }
3900 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3901}
3902
3903
3904/**
3905 * Allocates a new physical cross reference extent.
3906 *
3907 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3908 * @param pVM The cross context VM structure.
3909 * @param piPhysExt Where to store the phys ext index.
3910 */
3911PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3912{
3913 PGM_LOCK_ASSERT_OWNER(pVM);
3914 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3915 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3916 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3917 {
3918 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3919 return NULL;
3920 }
3921 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3922 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3923 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3924 *piPhysExt = iPhysExt;
3925 return pPhysExt;
3926}
3927
3928
3929/**
3930 * Frees a physical cross reference extent.
3931 *
3932 * @param pVM The cross context VM structure.
3933 * @param iPhysExt The extent to free.
3934 */
3935void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3936{
3937 PGM_LOCK_ASSERT_OWNER(pVM);
3938 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3939 Assert(iPhysExt < pPool->cMaxPhysExts);
3940 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3941 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3942 {
3943 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3944 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3945 }
3946 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3947 pPool->iPhysExtFreeHead = iPhysExt;
3948}
3949
3950
3951/**
3952 * Frees a physical cross reference extent.
3953 *
3954 * @param pVM The cross context VM structure.
3955 * @param iPhysExt The extent to free.
3956 */
3957void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3958{
3959 PGM_LOCK_ASSERT_OWNER(pVM);
3960 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3961
3962 const uint16_t iPhysExtStart = iPhysExt;
3963 PPGMPOOLPHYSEXT pPhysExt;
3964 do
3965 {
3966 Assert(iPhysExt < pPool->cMaxPhysExts);
3967 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3968 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3969 {
3970 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3971 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3972 }
3973
3974 /* next */
3975 iPhysExt = pPhysExt->iNext;
3976 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3977
3978 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3979 pPool->iPhysExtFreeHead = iPhysExtStart;
3980}
3981
3982
3983/**
3984 * Insert a reference into a list of physical cross reference extents.
3985 *
3986 * @returns The new tracking data for PGMPAGE.
3987 *
3988 * @param pVM The cross context VM structure.
3989 * @param iPhysExt The physical extent index of the list head.
3990 * @param iShwPT The shadow page table index.
3991 * @param iPte Page table entry
3992 *
3993 */
3994static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
3995{
3996 PGM_LOCK_ASSERT_OWNER(pVM);
3997 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3998 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3999
4000 /*
4001 * Special common cases.
4002 */
4003 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
4004 {
4005 paPhysExts[iPhysExt].aidx[1] = iShwPT;
4006 paPhysExts[iPhysExt].apte[1] = iPte;
4007 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4008 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
4009 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4010 }
4011 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
4012 {
4013 paPhysExts[iPhysExt].aidx[2] = iShwPT;
4014 paPhysExts[iPhysExt].apte[2] = iPte;
4015 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4016 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
4017 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4018 }
4019 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
4020
4021 /*
4022 * General treatment.
4023 */
4024 const uint16_t iPhysExtStart = iPhysExt;
4025 unsigned cMax = 15;
4026 for (;;)
4027 {
4028 Assert(iPhysExt < pPool->cMaxPhysExts);
4029 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4030 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
4031 {
4032 paPhysExts[iPhysExt].aidx[i] = iShwPT;
4033 paPhysExts[iPhysExt].apte[i] = iPte;
4034 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4035 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
4036 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
4037 }
4038 if (!--cMax)
4039 {
4040 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackOverflows);
4041 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4042 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
4043 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4044 }
4045
4046 /* advance */
4047 iPhysExt = paPhysExts[iPhysExt].iNext;
4048 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4049 break;
4050 }
4051
4052 /*
4053 * Add another extent to the list.
4054 */
4055 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4056 if (!pNew)
4057 {
4058 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackNoExtentsLeft);
4059 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4060 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4061 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4062 }
4063 pNew->iNext = iPhysExtStart;
4064 pNew->aidx[0] = iShwPT;
4065 pNew->apte[0] = iPte;
4066 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4067 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4068}
4069
4070
4071/**
4072 * Add a reference to guest physical page where extents are in use.
4073 *
4074 * @returns The new tracking data for PGMPAGE.
4075 *
4076 * @param pVM The cross context VM structure.
4077 * @param pPhysPage Pointer to the aPages entry in the ram range.
4078 * @param u16 The ram range flags (top 16-bits).
4079 * @param iShwPT The shadow page table index.
4080 * @param iPte Page table entry
4081 */
4082uint16_t pgmPoolTrackPhysExtAddref(PVMCC pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4083{
4084 pgmLock(pVM);
4085 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4086 {
4087 /*
4088 * Convert to extent list.
4089 */
4090 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4091 uint16_t iPhysExt;
4092 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4093 if (pPhysExt)
4094 {
4095 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4096 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliased);
4097 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4098 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4099 pPhysExt->aidx[1] = iShwPT;
4100 pPhysExt->apte[1] = iPte;
4101 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4102 }
4103 else
4104 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4105 }
4106 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4107 {
4108 /*
4109 * Insert into the extent list.
4110 */
4111 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4112 }
4113 else
4114 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedLots);
4115 pgmUnlock(pVM);
4116 return u16;
4117}
4118
4119
4120/**
4121 * Clear references to guest physical memory.
4122 *
4123 * @param pPool The pool.
4124 * @param pPage The page.
4125 * @param pPhysPage Pointer to the aPages entry in the ram range.
4126 * @param iPte Shadow PTE index
4127 */
4128void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4129{
4130 PVMCC pVM = pPool->CTX_SUFF(pVM);
4131 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4132 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4133
4134 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4135 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4136 {
4137 pgmLock(pVM);
4138
4139 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4140 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4141 do
4142 {
4143 Assert(iPhysExt < pPool->cMaxPhysExts);
4144
4145 /*
4146 * Look for the shadow page and check if it's all freed.
4147 */
4148 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4149 {
4150 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4151 && paPhysExts[iPhysExt].apte[i] == iPte)
4152 {
4153 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4154 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4155
4156 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4157 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4158 {
4159 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4160 pgmUnlock(pVM);
4161 return;
4162 }
4163
4164 /* we can free the node. */
4165 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4166 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4167 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4168 {
4169 /* lonely node */
4170 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4171 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4172 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4173 }
4174 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4175 {
4176 /* head */
4177 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4178 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4179 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4180 }
4181 else
4182 {
4183 /* in list */
4184 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4185 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4186 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4187 }
4188 iPhysExt = iPhysExtNext;
4189 pgmUnlock(pVM);
4190 return;
4191 }
4192 }
4193
4194 /* next */
4195 iPhysExtPrev = iPhysExt;
4196 iPhysExt = paPhysExts[iPhysExt].iNext;
4197 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4198
4199 pgmUnlock(pVM);
4200 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4201 }
4202 else /* nothing to do */
4203 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4204}
4205
4206/**
4207 * Clear references to guest physical memory.
4208 *
4209 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4210 * physical address is assumed to be correct, so the linear search can be
4211 * skipped and we can assert at an earlier point.
4212 *
4213 * @param pPool The pool.
4214 * @param pPage The page.
4215 * @param HCPhys The host physical address corresponding to the guest page.
4216 * @param GCPhys The guest physical address corresponding to HCPhys.
4217 * @param iPte Shadow PTE index
4218 */
4219static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4220{
4221 /*
4222 * Lookup the page and check if it checks out before derefing it.
4223 */
4224 PVMCC pVM = pPool->CTX_SUFF(pVM);
4225 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4226 if (pPhysPage)
4227 {
4228 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4229#ifdef LOG_ENABLED
4230 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4231 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4232#endif
4233 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4234 {
4235 Assert(pPage->cPresent);
4236 Assert(pPool->cPresent);
4237 pPage->cPresent--;
4238 pPool->cPresent--;
4239 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4240 return;
4241 }
4242
4243 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp\n",
4244 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage)));
4245 }
4246 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4247}
4248
4249
4250/**
4251 * Clear references to guest physical memory.
4252 *
4253 * @param pPool The pool.
4254 * @param pPage The page.
4255 * @param HCPhys The host physical address corresponding to the guest page.
4256 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4257 * @param iPte Shadow pte index
4258 */
4259void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4260{
4261 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4262
4263 /*
4264 * Try the hint first.
4265 */
4266 RTHCPHYS HCPhysHinted;
4267 PVMCC pVM = pPool->CTX_SUFF(pVM);
4268 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4269 if (pPhysPage)
4270 {
4271 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4272 Assert(HCPhysHinted);
4273 if (HCPhysHinted == HCPhys)
4274 {
4275 Assert(pPage->cPresent);
4276 Assert(pPool->cPresent);
4277 pPage->cPresent--;
4278 pPool->cPresent--;
4279 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4280 return;
4281 }
4282 }
4283 else
4284 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4285
4286 /*
4287 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4288 */
4289 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4290 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX);
4291 while (pRam)
4292 {
4293 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4294 while (iPage-- > 0)
4295 {
4296 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4297 {
4298 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4299 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4300 Assert(pPage->cPresent);
4301 Assert(pPool->cPresent);
4302 pPage->cPresent--;
4303 pPool->cPresent--;
4304 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4305 return;
4306 }
4307 }
4308 pRam = pRam->CTX_SUFF(pNext);
4309 }
4310
4311 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4312}
4313
4314
4315/**
4316 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4317 *
4318 * @param pPool The pool.
4319 * @param pPage The page.
4320 * @param pShwPT The shadow page table (mapping of the page).
4321 * @param pGstPT The guest page table.
4322 */
4323DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4324{
4325 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4326 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4327 {
4328 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4329 if (pShwPT->a[i].n.u1Present)
4330 {
4331 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4332 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4333 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & fPgMask, i);
4334 if (!pPage->cPresent)
4335 break;
4336 }
4337 }
4338}
4339
4340
4341/**
4342 * Clear references to guest physical memory in a PAE / 32-bit page table.
4343 *
4344 * @param pPool The pool.
4345 * @param pPage The page.
4346 * @param pShwPT The shadow page table (mapping of the page).
4347 * @param pGstPT The guest page table (just a half one).
4348 */
4349DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4350{
4351 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4352 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4353 {
4354 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4355 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4356 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4357 {
4358 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4359 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4360 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4361 if (!pPage->cPresent)
4362 break;
4363 }
4364 }
4365}
4366
4367
4368/**
4369 * Clear references to guest physical memory in a PAE / PAE page table.
4370 *
4371 * @param pPool The pool.
4372 * @param pPage The page.
4373 * @param pShwPT The shadow page table (mapping of the page).
4374 * @param pGstPT The guest page table.
4375 */
4376DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4377{
4378 RTGCPHYS const fPgMask = pPage->fA20Enabled ? X86_PTE_PAE_PG_MASK : X86_PTE_PAE_PG_MASK & ~RT_BIT_64(20);
4379 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4380 {
4381 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4382 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4383 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4384 {
4385 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4386 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4387 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4388 if (!pPage->cPresent)
4389 break;
4390 }
4391 }
4392}
4393
4394
4395/**
4396 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4397 *
4398 * @param pPool The pool.
4399 * @param pPage The page.
4400 * @param pShwPT The shadow page table (mapping of the page).
4401 */
4402DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4403{
4404 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4405 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4406 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4407 {
4408 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4409 if (pShwPT->a[i].n.u1Present)
4410 {
4411 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4412 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4413 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4414 if (!pPage->cPresent)
4415 break;
4416 }
4417 }
4418}
4419
4420
4421/**
4422 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4423 *
4424 * @param pPool The pool.
4425 * @param pPage The page.
4426 * @param pShwPT The shadow page table (mapping of the page).
4427 */
4428DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4429{
4430 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4431 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4432 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4433 {
4434 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4435 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4436 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4437 {
4438 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4439 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4440 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys & GCPhysA20Mask, i);
4441 if (!pPage->cPresent)
4442 break;
4443 }
4444 }
4445}
4446
4447
4448/**
4449 * Clear references to shadowed pages in an EPT page table.
4450 *
4451 * @param pPool The pool.
4452 * @param pPage The page.
4453 * @param pShwPT The shadow page directory pointer table (mapping of the
4454 * page).
4455 */
4456DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4457{
4458 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4459 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4460 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4461 {
4462 Assert((pShwPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4463 if (pShwPT->a[i].n.u1Present)
4464 {
4465 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4466 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4467 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4468 if (!pPage->cPresent)
4469 break;
4470 }
4471 }
4472}
4473
4474
4475/**
4476 * Clear references to shadowed pages in a 32 bits page directory.
4477 *
4478 * @param pPool The pool.
4479 * @param pPage The page.
4480 * @param pShwPD The shadow page directory (mapping of the page).
4481 */
4482DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4483{
4484 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4485 {
4486 if ( pShwPD->a[i].n.u1Present
4487 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4488 )
4489 {
4490 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4491 if (pSubPage)
4492 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4493 else
4494 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4495 }
4496 }
4497}
4498
4499
4500/**
4501 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4502 *
4503 * @param pPool The pool.
4504 * @param pPage The page.
4505 * @param pShwPD The shadow page directory (mapping of the page).
4506 */
4507DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4508{
4509 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4510 {
4511 if ( pShwPD->a[i].n.u1Present
4512 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING))
4513 {
4514#ifdef PGM_WITH_LARGE_PAGES
4515 if (pShwPD->a[i].b.u1Size)
4516 {
4517 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4518 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4519 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK,
4520 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4521 i);
4522 }
4523 else
4524#endif
4525 {
4526 Assert((pShwPD->a[i].u & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000000))) == 0);
4527 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4528 if (pSubPage)
4529 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4530 else
4531 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4532 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4533 }
4534 }
4535 }
4536}
4537
4538
4539/**
4540 * Clear references to shadowed pages in a PAE page directory pointer table.
4541 *
4542 * @param pPool The pool.
4543 * @param pPage The page.
4544 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4545 */
4546DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4547{
4548 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4549 {
4550 Assert((pShwPDPT->a[i].u & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4551 if ( pShwPDPT->a[i].n.u1Present
4552 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4553 )
4554 {
4555 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4556 if (pSubPage)
4557 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4558 else
4559 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4560 }
4561 }
4562}
4563
4564
4565/**
4566 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4567 *
4568 * @param pPool The pool.
4569 * @param pPage The page.
4570 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4571 */
4572DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4573{
4574 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4575 {
4576 Assert((pShwPDPT->a[i].u & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4577 if (pShwPDPT->a[i].n.u1Present)
4578 {
4579 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4580 if (pSubPage)
4581 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4582 else
4583 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4584 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4585 }
4586 }
4587}
4588
4589
4590/**
4591 * Clear references to shadowed pages in a 64-bit level 4 page table.
4592 *
4593 * @param pPool The pool.
4594 * @param pPage The page.
4595 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4596 */
4597DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4598{
4599 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4600 {
4601 Assert((pShwPML4->a[i].u & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4602 if (pShwPML4->a[i].n.u1Present)
4603 {
4604 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4605 if (pSubPage)
4606 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4607 else
4608 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4609 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4610 }
4611 }
4612}
4613
4614
4615/**
4616 * Clear references to shadowed pages in an EPT page directory.
4617 *
4618 * @param pPool The pool.
4619 * @param pPage The page.
4620 * @param pShwPD The shadow page directory (mapping of the page).
4621 */
4622DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4623{
4624 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4625 {
4626 Assert((pShwPD->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4627 if (pShwPD->a[i].n.u1Present)
4628 {
4629#ifdef PGM_WITH_LARGE_PAGES
4630 if (pShwPD->a[i].b.u1Size)
4631 {
4632 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4633 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4634 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK,
4635 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4636 i);
4637 }
4638 else
4639#endif
4640 {
4641 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4642 if (pSubPage)
4643 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4644 else
4645 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4646 }
4647 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4648 }
4649 }
4650}
4651
4652
4653/**
4654 * Clear references to shadowed pages in an EPT page directory pointer table.
4655 *
4656 * @param pPool The pool.
4657 * @param pPage The page.
4658 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4659 */
4660DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4661{
4662 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4663 {
4664 Assert((pShwPDPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4665 if (pShwPDPT->a[i].n.u1Present)
4666 {
4667 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4668 if (pSubPage)
4669 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4670 else
4671 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4672 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4673 }
4674 }
4675}
4676
4677
4678/**
4679 * Clears all references made by this page.
4680 *
4681 * This includes other shadow pages and GC physical addresses.
4682 *
4683 * @param pPool The pool.
4684 * @param pPage The page.
4685 */
4686static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4687{
4688 /*
4689 * Map the shadow page and take action according to the page kind.
4690 */
4691 PVMCC pVM = pPool->CTX_SUFF(pVM);
4692 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4693 switch (pPage->enmKind)
4694 {
4695 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4696 {
4697 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4698 void *pvGst;
4699 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4700 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4701 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4702 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4703 break;
4704 }
4705
4706 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4707 {
4708 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4709 void *pvGst;
4710 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4711 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
4712 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4713 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4714 break;
4715 }
4716
4717 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4718 {
4719 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4720 void *pvGst;
4721 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4722 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
4723 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4724 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4725 break;
4726 }
4727
4728 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4729 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4730 {
4731 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4732 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4733 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4734 break;
4735 }
4736
4737 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4738 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4739 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4740 {
4741 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4742 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
4743 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4744 break;
4745 }
4746
4747 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4748 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4749 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4750 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4751 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4752 case PGMPOOLKIND_PAE_PD_PHYS:
4753 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4754 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4755 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4756 break;
4757
4758 case PGMPOOLKIND_32BIT_PD_PHYS:
4759 case PGMPOOLKIND_32BIT_PD:
4760 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4761 break;
4762
4763 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4764 case PGMPOOLKIND_PAE_PDPT:
4765 case PGMPOOLKIND_PAE_PDPT_PHYS:
4766 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4767 break;
4768
4769 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4770 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4771 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4772 break;
4773
4774 case PGMPOOLKIND_64BIT_PML4:
4775 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4776 break;
4777
4778 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4779 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4780 break;
4781
4782 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4783 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4784 break;
4785
4786 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4787 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4788 break;
4789
4790 default:
4791 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4792 }
4793
4794 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4795 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4796 ASMMemZeroPage(pvShw);
4797 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4798 pPage->fZeroed = true;
4799 Assert(!pPage->cPresent);
4800 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
4801}
4802
4803
4804/**
4805 * Flushes a pool page.
4806 *
4807 * This moves the page to the free list after removing all user references to it.
4808 *
4809 * @returns VBox status code.
4810 * @retval VINF_SUCCESS on success.
4811 * @param pPool The pool.
4812 * @param pPage The shadow page.
4813 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4814 */
4815int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4816{
4817 PVMCC pVM = pPool->CTX_SUFF(pVM);
4818 bool fFlushRequired = false;
4819
4820 int rc = VINF_SUCCESS;
4821 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4822 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4823 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4824
4825 /*
4826 * Reject any attempts at flushing any of the special root pages (shall
4827 * not happen).
4828 */
4829 AssertMsgReturn(pPage->idx >= PGMPOOL_IDX_FIRST,
4830 ("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n",
4831 pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx),
4832 VINF_SUCCESS);
4833
4834 pgmLock(pVM);
4835
4836 /*
4837 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4838 */
4839 if (pgmPoolIsPageLocked(pPage))
4840 {
4841 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4842 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4843 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4844 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4845 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4846 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4847 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4848 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4849 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
4850 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
4851 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4852 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4853 pgmUnlock(pVM);
4854 return VINF_SUCCESS;
4855 }
4856
4857#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4858 /* Start a subset so we won't run out of mapping space. */
4859 PVMCPU pVCpu = VMMGetCpu(pVM);
4860 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
4861#endif
4862
4863 /*
4864 * Mark the page as being in need of an ASMMemZeroPage().
4865 */
4866 pPage->fZeroed = false;
4867
4868#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4869 if (pPage->fDirty)
4870 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirtyEntry, false /* do not remove */);
4871#endif
4872
4873 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4874 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4875 fFlushRequired = true;
4876
4877 /*
4878 * Clear the page.
4879 */
4880 pgmPoolTrackClearPageUsers(pPool, pPage);
4881 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4882 pgmPoolTrackDeref(pPool, pPage);
4883 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4884
4885 /*
4886 * Flush it from the cache.
4887 */
4888 pgmPoolCacheFlushPage(pPool, pPage);
4889
4890#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4891 /* Heavy stuff done. */
4892 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
4893#endif
4894
4895 /*
4896 * Deregistering the monitoring.
4897 */
4898 if (pPage->fMonitored)
4899 rc = pgmPoolMonitorFlush(pPool, pPage);
4900
4901 /*
4902 * Free the page.
4903 */
4904 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4905 pPage->iNext = pPool->iFreeHead;
4906 pPool->iFreeHead = pPage->idx;
4907 pPage->enmKind = PGMPOOLKIND_FREE;
4908 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4909 pPage->GCPhys = NIL_RTGCPHYS;
4910 pPage->fReusedFlushPending = false;
4911
4912 pPool->cUsedPages--;
4913
4914 /* Flush the TLBs of all VCPUs if required. */
4915 if ( fFlushRequired
4916 && fFlush)
4917 {
4918 PGM_INVL_ALL_VCPU_TLBS(pVM);
4919 }
4920
4921 pgmUnlock(pVM);
4922 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4923 return rc;
4924}
4925
4926
4927/**
4928 * Frees a usage of a pool page.
4929 *
4930 * The caller is responsible to updating the user table so that it no longer
4931 * references the shadow page.
4932 *
4933 * @param pPool The pool.
4934 * @param pPage The shadow page.
4935 * @param iUser The shadow page pool index of the user table.
4936 * NIL_PGMPOOL_IDX for root pages.
4937 * @param iUserTable The index into the user table (shadowed). Ignored if
4938 * root page.
4939 */
4940void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4941{
4942 PVMCC pVM = pPool->CTX_SUFF(pVM);
4943
4944 STAM_PROFILE_START(&pPool->StatFree, a);
4945 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
4946 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4947 AssertReturnVoid(pPage->idx >= PGMPOOL_IDX_FIRST); /* paranoia (#6349) */
4948
4949 pgmLock(pVM);
4950 if (iUser != NIL_PGMPOOL_IDX)
4951 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4952 if (!pPage->fCached)
4953 pgmPoolFlushPage(pPool, pPage);
4954 pgmUnlock(pVM);
4955 STAM_PROFILE_STOP(&pPool->StatFree, a);
4956}
4957
4958
4959/**
4960 * Makes one or more free page free.
4961 *
4962 * @returns VBox status code.
4963 * @retval VINF_SUCCESS on success.
4964 *
4965 * @param pPool The pool.
4966 * @param enmKind Page table kind
4967 * @param iUser The user of the page.
4968 */
4969static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4970{
4971 PVMCC pVM = pPool->CTX_SUFF(pVM);
4972 LogFlow(("pgmPoolMakeMoreFreePages: enmKind=%d iUser=%d\n", enmKind, iUser));
4973 NOREF(enmKind);
4974
4975 /*
4976 * If the pool isn't full grown yet, expand it.
4977 */
4978 if (pPool->cCurPages < pPool->cMaxPages)
4979 {
4980 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4981#ifdef IN_RING3
4982 int rc = PGMR3PoolGrow(pVM, VMMGetCpu(pVM));
4983#else
4984 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
4985#endif
4986 if (RT_FAILURE(rc))
4987 return rc;
4988 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4989 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4990 return VINF_SUCCESS;
4991 }
4992
4993 /*
4994 * Free one cached page.
4995 */
4996 return pgmPoolCacheFreeOne(pPool, iUser);
4997}
4998
4999
5000/**
5001 * Allocates a page from the pool.
5002 *
5003 * This page may actually be a cached page and not in need of any processing
5004 * on the callers part.
5005 *
5006 * @returns VBox status code.
5007 * @retval VINF_SUCCESS if a NEW page was allocated.
5008 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
5009 *
5010 * @param pVM The cross context VM structure.
5011 * @param GCPhys The GC physical address of the page we're gonna shadow.
5012 * For 4MB and 2MB PD entries, it's the first address the
5013 * shadow PT is covering.
5014 * @param enmKind The kind of mapping.
5015 * @param enmAccess Access type for the mapping (only relevant for big pages)
5016 * @param fA20Enabled Whether the A20 gate is enabled or not.
5017 * @param iUser The shadow page pool index of the user table. Root
5018 * pages should pass NIL_PGMPOOL_IDX.
5019 * @param iUserTable The index into the user table (shadowed). Ignored for
5020 * root pages (iUser == NIL_PGMPOOL_IDX).
5021 * @param fLockPage Lock the page
5022 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
5023 */
5024int pgmPoolAlloc(PVMCC pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
5025 uint16_t iUser, uint32_t iUserTable, bool fLockPage, PPPGMPOOLPAGE ppPage)
5026{
5027 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5028 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
5029 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
5030 *ppPage = NULL;
5031 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
5032 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
5033 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
5034
5035 pgmLock(pVM);
5036
5037 if (pPool->fCacheEnabled)
5038 {
5039 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, fA20Enabled, iUser, iUserTable, ppPage);
5040 if (RT_SUCCESS(rc2))
5041 {
5042 if (fLockPage)
5043 pgmPoolLockPage(pPool, *ppPage);
5044 pgmUnlock(pVM);
5045 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5046 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
5047 return rc2;
5048 }
5049 }
5050
5051 /*
5052 * Allocate a new one.
5053 */
5054 int rc = VINF_SUCCESS;
5055 uint16_t iNew = pPool->iFreeHead;
5056 if (iNew == NIL_PGMPOOL_IDX)
5057 {
5058 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
5059 if (RT_FAILURE(rc))
5060 {
5061 pgmUnlock(pVM);
5062 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
5063 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5064 return rc;
5065 }
5066 iNew = pPool->iFreeHead;
5067 AssertReleaseMsgReturn(iNew != NIL_PGMPOOL_IDX, ("iNew=%#x\n", iNew), VERR_PGM_POOL_IPE);
5068 }
5069
5070 /* unlink the free head */
5071 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5072 pPool->iFreeHead = pPage->iNext;
5073 pPage->iNext = NIL_PGMPOOL_IDX;
5074
5075 /*
5076 * Initialize it.
5077 */
5078 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5079 pPage->enmKind = enmKind;
5080 pPage->enmAccess = enmAccess;
5081 pPage->GCPhys = GCPhys;
5082 pPage->fA20Enabled = fA20Enabled;
5083 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5084 pPage->fMonitored = false;
5085 pPage->fCached = false;
5086 pPage->fDirty = false;
5087 pPage->fReusedFlushPending = false;
5088 pPage->cModifications = 0;
5089 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5090 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5091 pPage->cPresent = 0;
5092 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5093 pPage->idxDirtyEntry = 0;
5094 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5095 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5096 pPage->cLastAccessHandler = 0;
5097 pPage->cLocked = 0;
5098# ifdef VBOX_STRICT
5099 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5100# endif
5101
5102 /*
5103 * Insert into the tracking and cache. If this fails, free the page.
5104 */
5105 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5106 if (RT_FAILURE(rc3))
5107 {
5108 pPool->cUsedPages--;
5109 pPage->enmKind = PGMPOOLKIND_FREE;
5110 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5111 pPage->GCPhys = NIL_RTGCPHYS;
5112 pPage->iNext = pPool->iFreeHead;
5113 pPool->iFreeHead = pPage->idx;
5114 pgmUnlock(pVM);
5115 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5116 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
5117 return rc3;
5118 }
5119
5120 /*
5121 * Commit the allocation, clear the page and return.
5122 */
5123#ifdef VBOX_WITH_STATISTICS
5124 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5125 pPool->cUsedPagesHigh = pPool->cUsedPages;
5126#endif
5127
5128 if (!pPage->fZeroed)
5129 {
5130 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5131 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5132 ASMMemZeroPage(pv);
5133 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5134 }
5135
5136 *ppPage = pPage;
5137 if (fLockPage)
5138 pgmPoolLockPage(pPool, pPage);
5139 pgmUnlock(pVM);
5140 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5141 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5142 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5143 return rc;
5144}
5145
5146
5147/**
5148 * Frees a usage of a pool page.
5149 *
5150 * @param pVM The cross context VM structure.
5151 * @param HCPhys The HC physical address of the shadow page.
5152 * @param iUser The shadow page pool index of the user table.
5153 * NIL_PGMPOOL_IDX if root page.
5154 * @param iUserTable The index into the user table (shadowed). Ignored if
5155 * root page.
5156 */
5157void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5158{
5159 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5160 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5161 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5162}
5163
5164
5165/**
5166 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5167 *
5168 * @returns Pointer to the shadow page structure.
5169 * @param pPool The pool.
5170 * @param HCPhys The HC physical address of the shadow page.
5171 */
5172PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5173{
5174 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5175
5176 /*
5177 * Look up the page.
5178 */
5179 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5180
5181 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5182 return pPage;
5183}
5184
5185
5186/**
5187 * Internal worker for finding a page for debugging purposes, no assertions.
5188 *
5189 * @returns Pointer to the shadow page structure. NULL on if not found.
5190 * @param pPool The pool.
5191 * @param HCPhys The HC physical address of the shadow page.
5192 */
5193PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5194{
5195 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5196 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5197}
5198
5199
5200/**
5201 * Internal worker for PGM_HCPHYS_2_PTR.
5202 *
5203 * @returns VBox status code.
5204 * @param pVM The cross context VM structure.
5205 * @param HCPhys The HC physical address of the shadow page.
5206 * @param ppv Where to return the address.
5207 */
5208int pgmPoolHCPhys2Ptr(PVM pVM, RTHCPHYS HCPhys, void **ppv)
5209{
5210 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pVM->pgm.s.CTX_SUFF(pPool)->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5211 AssertMsgReturn(pPage && pPage->enmKind != PGMPOOLKIND_FREE,
5212 ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0),
5213 VERR_PGM_POOL_GET_PAGE_FAILED);
5214 *ppv = (uint8_t *)pPage->CTX_SUFF(pvPage) + (HCPhys & PAGE_OFFSET_MASK);
5215 return VINF_SUCCESS;
5216}
5217
5218#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5219
5220/**
5221 * Flush the specified page if present
5222 *
5223 * @param pVM The cross context VM structure.
5224 * @param GCPhys Guest physical address of the page to flush
5225 */
5226void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5227{
5228 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5229
5230 VM_ASSERT_EMT(pVM);
5231
5232 /*
5233 * Look up the GCPhys in the hash.
5234 */
5235 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5236 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5237 if (i == NIL_PGMPOOL_IDX)
5238 return;
5239
5240 do
5241 {
5242 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5243 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5244 {
5245 switch (pPage->enmKind)
5246 {
5247 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5248 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5249 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5250 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5251 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5252 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5253 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5254 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5255 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5256 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5257 case PGMPOOLKIND_64BIT_PML4:
5258 case PGMPOOLKIND_32BIT_PD:
5259 case PGMPOOLKIND_PAE_PDPT:
5260 {
5261 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5262# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5263 if (pPage->fDirty)
5264 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5265 else
5266# endif
5267 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5268 Assert(!pgmPoolIsPageLocked(pPage));
5269 pgmPoolMonitorChainFlush(pPool, pPage);
5270 return;
5271 }
5272
5273 /* ignore, no monitoring. */
5274 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5275 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5276 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5277 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5278 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5279 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5280 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5281 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5282 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5283 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5284 case PGMPOOLKIND_ROOT_NESTED:
5285 case PGMPOOLKIND_PAE_PD_PHYS:
5286 case PGMPOOLKIND_PAE_PDPT_PHYS:
5287 case PGMPOOLKIND_32BIT_PD_PHYS:
5288 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5289 break;
5290
5291 default:
5292 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5293 }
5294 }
5295
5296 /* next */
5297 i = pPage->iNext;
5298 } while (i != NIL_PGMPOOL_IDX);
5299 return;
5300}
5301
5302
5303/**
5304 * Reset CPU on hot plugging.
5305 *
5306 * @param pVM The cross context VM structure.
5307 * @param pVCpu The cross context virtual CPU structure.
5308 */
5309void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5310{
5311 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5312
5313 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5314 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5315 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5316}
5317
5318
5319/**
5320 * Flushes the entire cache.
5321 *
5322 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5323 * this and execute this CR3 flush.
5324 *
5325 * @param pVM The cross context VM structure.
5326 */
5327void pgmR3PoolReset(PVM pVM)
5328{
5329 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5330
5331 PGM_LOCK_ASSERT_OWNER(pVM);
5332 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5333 LogFlow(("pgmR3PoolReset:\n"));
5334
5335 /*
5336 * If there are no pages in the pool, there is nothing to do.
5337 */
5338 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5339 {
5340 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5341 return;
5342 }
5343
5344 /*
5345 * Exit the shadow mode since we're going to clear everything,
5346 * including the root page.
5347 */
5348 VMCC_FOR_EACH_VMCPU(pVM)
5349 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5350 VMCC_FOR_EACH_VMCPU_END(pVM);
5351
5352
5353 /*
5354 * Nuke the free list and reinsert all pages into it.
5355 */
5356 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5357 {
5358 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5359
5360 if (pPage->fMonitored)
5361 pgmPoolMonitorFlush(pPool, pPage);
5362 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5363 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5364 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5365 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5366 pPage->GCPhys = NIL_RTGCPHYS;
5367 pPage->enmKind = PGMPOOLKIND_FREE;
5368 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5369 Assert(pPage->idx == i);
5370 pPage->iNext = i + 1;
5371 pPage->fA20Enabled = true;
5372 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5373 pPage->fSeenNonGlobal = false;
5374 pPage->fMonitored = false;
5375 pPage->fDirty = false;
5376 pPage->fCached = false;
5377 pPage->fReusedFlushPending = false;
5378 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5379 pPage->cPresent = 0;
5380 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5381 pPage->cModifications = 0;
5382 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5383 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5384 pPage->idxDirtyEntry = 0;
5385 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5386 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5387 pPage->cLastAccessHandler = 0;
5388 pPage->cLocked = 0;
5389# ifdef VBOX_STRICT
5390 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5391# endif
5392 }
5393 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5394 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5395 pPool->cUsedPages = 0;
5396
5397 /*
5398 * Zap and reinitialize the user records.
5399 */
5400 pPool->cPresent = 0;
5401 pPool->iUserFreeHead = 0;
5402 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5403 const unsigned cMaxUsers = pPool->cMaxUsers;
5404 for (unsigned i = 0; i < cMaxUsers; i++)
5405 {
5406 paUsers[i].iNext = i + 1;
5407 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5408 paUsers[i].iUserTable = 0xfffffffe;
5409 }
5410 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5411
5412 /*
5413 * Clear all the GCPhys links and rebuild the phys ext free list.
5414 */
5415 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
5416 pRam;
5417 pRam = pRam->CTX_SUFF(pNext))
5418 {
5419 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5420 while (iPage-- > 0)
5421 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5422 }
5423
5424 pPool->iPhysExtFreeHead = 0;
5425 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5426 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5427 for (unsigned i = 0; i < cMaxPhysExts; i++)
5428 {
5429 paPhysExts[i].iNext = i + 1;
5430 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5431 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5432 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5433 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5434 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5435 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5436 }
5437 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5438
5439 /*
5440 * Just zap the modified list.
5441 */
5442 pPool->cModifiedPages = 0;
5443 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5444
5445 /*
5446 * Clear the GCPhys hash and the age list.
5447 */
5448 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5449 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5450 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5451 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5452
5453# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5454 /* Clear all dirty pages. */
5455 pPool->idxFreeDirtyPage = 0;
5456 pPool->cDirtyPages = 0;
5457 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aidxDirtyPages); i++)
5458 pPool->aidxDirtyPages[i] = NIL_PGMPOOL_IDX;
5459# endif
5460
5461 /*
5462 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5463 */
5464 VMCC_FOR_EACH_VMCPU(pVM)
5465 {
5466 /*
5467 * Re-enter the shadowing mode and assert Sync CR3 FF.
5468 */
5469 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5470 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5471 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5472 }
5473 VMCC_FOR_EACH_VMCPU_END(pVM);
5474
5475 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5476}
5477
5478#endif /* IN_RING3 */
5479
5480#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5481/**
5482 * Stringifies a PGMPOOLKIND value.
5483 */
5484static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5485{
5486 switch ((PGMPOOLKIND)enmKind)
5487 {
5488 case PGMPOOLKIND_INVALID:
5489 return "PGMPOOLKIND_INVALID";
5490 case PGMPOOLKIND_FREE:
5491 return "PGMPOOLKIND_FREE";
5492 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5493 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5494 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5495 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5496 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5497 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5498 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5499 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5500 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5501 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5502 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5503 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5504 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5505 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5506 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5507 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5508 case PGMPOOLKIND_32BIT_PD:
5509 return "PGMPOOLKIND_32BIT_PD";
5510 case PGMPOOLKIND_32BIT_PD_PHYS:
5511 return "PGMPOOLKIND_32BIT_PD_PHYS";
5512 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5513 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5514 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5515 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5516 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5517 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5518 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5519 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5520 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5521 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5522 case PGMPOOLKIND_PAE_PD_PHYS:
5523 return "PGMPOOLKIND_PAE_PD_PHYS";
5524 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5525 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5526 case PGMPOOLKIND_PAE_PDPT:
5527 return "PGMPOOLKIND_PAE_PDPT";
5528 case PGMPOOLKIND_PAE_PDPT_PHYS:
5529 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5530 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5531 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5532 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5533 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5534 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5535 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5536 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5537 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5538 case PGMPOOLKIND_64BIT_PML4:
5539 return "PGMPOOLKIND_64BIT_PML4";
5540 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5541 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5542 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5543 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5544 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5545 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5546 case PGMPOOLKIND_ROOT_NESTED:
5547 return "PGMPOOLKIND_ROOT_NESTED";
5548 }
5549 return "Unknown kind!";
5550}
5551#endif /* LOG_ENABLED || VBOX_STRICT */
5552
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette