VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 80268

Last change on this file since 80268 was 80268, checked in by vboxsync, 6 years ago

VMM: Refactoring VMMAll/* to use VMCC & VMMCPUCC. bugref:9217

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 209.4 KB
Line 
1/* $Id: PGMAllPool.cpp 80268 2019-08-14 11:25:13Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2019 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define VBOX_BUGREF_9217_PART_I
23#define LOG_GROUP LOG_GROUP_PGM_POOL
24#include <VBox/vmm/pgm.h>
25#include <VBox/vmm/mm.h>
26#include <VBox/vmm/em.h>
27#include <VBox/vmm/cpum.h>
28#include "PGMInternal.h"
29#include <VBox/vmm/vmcc.h>
30#include "PGMInline.h"
31#include <VBox/disopcode.h>
32#include <VBox/vmm/hm_vmx.h>
33
34#include <VBox/log.h>
35#include <VBox/err.h>
36#include <iprt/asm.h>
37#include <iprt/asm-amd64-x86.h>
38#include <iprt/string.h>
39
40
41/*********************************************************************************************************************************
42* Internal Functions *
43*********************************************************************************************************************************/
44RT_C_DECLS_BEGIN
45#if 0 /* unused */
46DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
47DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
48#endif /* unused */
49static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
50static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
51static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
52static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
54static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
55#endif
56#if 0 /*defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)*/
57static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
58#endif
59
60int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
61PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
62void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
63void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
64
65RT_C_DECLS_END
66
67
68#if 0 /* unused */
69/**
70 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
71 *
72 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
73 * @param enmKind The page kind.
74 */
75DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
76{
77 switch (enmKind)
78 {
79 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
81 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
82 return true;
83 default:
84 return false;
85 }
86}
87#endif /* unused */
88
89
90/**
91 * Flushes a chain of pages sharing the same access monitor.
92 *
93 * @param pPool The pool.
94 * @param pPage A page in the chain.
95 */
96void pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
97{
98 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
99
100 /*
101 * Find the list head.
102 */
103 uint16_t idx = pPage->idx;
104 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
105 {
106 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
107 {
108 idx = pPage->iMonitoredPrev;
109 Assert(idx != pPage->idx);
110 pPage = &pPool->aPages[idx];
111 }
112 }
113
114 /*
115 * Iterate the list flushing each shadow page.
116 */
117 for (;;)
118 {
119 idx = pPage->iMonitoredNext;
120 Assert(idx != pPage->idx);
121 if (pPage->idx >= PGMPOOL_IDX_FIRST)
122 {
123 int rc2 = pgmPoolFlushPage(pPool, pPage);
124 AssertRC(rc2);
125 }
126 /* next */
127 if (idx == NIL_PGMPOOL_IDX)
128 break;
129 pPage = &pPool->aPages[idx];
130 }
131}
132
133
134/**
135 * Wrapper for getting the current context pointer to the entry being modified.
136 *
137 * @returns VBox status code suitable for scheduling.
138 * @param pVM The cross context VM structure.
139 * @param pvDst Destination address
140 * @param pvSrc Pointer to the mapping of @a GCPhysSrc or NULL depending
141 * on the context (e.g. \#PF in R0 & RC).
142 * @param GCPhysSrc The source guest physical address.
143 * @param cb Size of data to read
144 */
145DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVMCC pVM, void *pvDst, void const *pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
146{
147#if defined(IN_RING3)
148 NOREF(pVM); NOREF(GCPhysSrc);
149 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
150 return VINF_SUCCESS;
151#else
152 /** @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
153 NOREF(pvSrc);
154 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
155#endif
156}
157
158
159/**
160 * Process shadow entries before they are changed by the guest.
161 *
162 * For PT entries we will clear them. For PD entries, we'll simply check
163 * for mapping conflicts and set the SyncCR3 FF if found.
164 *
165 * @param pVCpu The cross context virtual CPU structure.
166 * @param pPool The pool.
167 * @param pPage The head page.
168 * @param GCPhysFault The guest physical fault address.
169 * @param pvAddress Pointer to the mapping of @a GCPhysFault or NULL
170 * depending on the context (e.g. \#PF in R0 & RC).
171 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
172 */
173static void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault,
174 void const *pvAddress, unsigned cbWrite)
175{
176 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
177 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
178 PVMCC pVM = pPool->CTX_SUFF(pVM);
179 NOREF(pVCpu);
180
181 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n",
182 (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))(uintptr_t)pvAddress, GCPhysFault, cbWrite));
183
184 for (;;)
185 {
186 union
187 {
188 void *pv;
189 PX86PT pPT;
190 PPGMSHWPTPAE pPTPae;
191 PX86PD pPD;
192 PX86PDPAE pPDPae;
193 PX86PDPT pPDPT;
194 PX86PML4 pPML4;
195 } uShw;
196
197 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s write=%#x\n",
198 pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
199
200 uShw.pv = NULL;
201 switch (pPage->enmKind)
202 {
203 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
204 {
205 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
206 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
207 const unsigned iShw = off / sizeof(X86PTE);
208 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
209 if (uShw.pPT->a[iShw].n.u1Present)
210 {
211 X86PTE GstPte;
212
213 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
214 AssertRC(rc);
215 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
216 pgmPoolTracDerefGCPhysHint(pPool, pPage,
217 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
218 GstPte.u & X86_PTE_PG_MASK,
219 iShw);
220 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
221 }
222 break;
223 }
224
225 /* page/2 sized */
226 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
227 {
228 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
229 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
230 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
231 {
232 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
233 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
234 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
235 {
236 X86PTE GstPte;
237 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
238 AssertRC(rc);
239
240 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
241 pgmPoolTracDerefGCPhysHint(pPool, pPage,
242 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
243 GstPte.u & X86_PTE_PG_MASK,
244 iShw);
245 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
246 }
247 }
248 break;
249 }
250
251 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
252 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
253 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
254 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
255 {
256 unsigned iGst = off / sizeof(X86PDE);
257 unsigned iShwPdpt = iGst / 256;
258 unsigned iShw = (iGst % 256) * 2;
259 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
260
261 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
262 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
263 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
264 {
265 for (unsigned i = 0; i < 2; i++)
266 {
267 if (uShw.pPDPae->a[iShw+i].n.u1Present)
268 {
269 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
270 pgmPoolFree(pVM,
271 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
272 pPage->idx,
273 iShw + i);
274 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw+i].u, 0);
275 }
276
277 /* paranoia / a bit assumptive. */
278 if ( (off & 3)
279 && (off & 3) + cbWrite > 4)
280 {
281 const unsigned iShw2 = iShw + 2 + i;
282 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
283 {
284 if (uShw.pPDPae->a[iShw2].n.u1Present)
285 {
286 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
287 pgmPoolFree(pVM,
288 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
289 pPage->idx,
290 iShw2);
291 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
292 }
293 }
294 }
295 }
296 }
297 break;
298 }
299
300 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
301 {
302 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
303 const unsigned iShw = off / sizeof(X86PTEPAE);
304 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
305 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
306 {
307 X86PTEPAE GstPte;
308 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
309 AssertRC(rc);
310
311 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
312 pgmPoolTracDerefGCPhysHint(pPool, pPage,
313 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
314 GstPte.u & X86_PTE_PAE_PG_MASK,
315 iShw);
316 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
317 }
318
319 /* paranoia / a bit assumptive. */
320 if ( (off & 7)
321 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
322 {
323 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
324 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
325
326 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
327 {
328 X86PTEPAE GstPte;
329 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
330 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
331 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
332 AssertRC(rc);
333 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
334 pgmPoolTracDerefGCPhysHint(pPool, pPage,
335 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
336 GstPte.u & X86_PTE_PAE_PG_MASK,
337 iShw2);
338 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
339 }
340 }
341 break;
342 }
343
344 case PGMPOOLKIND_32BIT_PD:
345 {
346 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
347 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
348
349 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
350 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
351 if (uShw.pPD->a[iShw].n.u1Present)
352 {
353 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
354 pgmPoolFree(pVM,
355 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
356 pPage->idx,
357 iShw);
358 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
359 }
360 /* paranoia / a bit assumptive. */
361 if ( (off & 3)
362 && (off & 3) + cbWrite > sizeof(X86PTE))
363 {
364 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
365 if ( iShw2 != iShw
366 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
367 {
368 if (uShw.pPD->a[iShw2].n.u1Present)
369 {
370 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
371 pgmPoolFree(pVM,
372 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
373 pPage->idx,
374 iShw2);
375 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
376 }
377 }
378 }
379#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). - not working any longer... */
380 if ( uShw.pPD->a[iShw].n.u1Present
381 && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
382 {
383 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
384 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
385 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
386 }
387#endif
388 break;
389 }
390
391 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
392 {
393 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
394 const unsigned iShw = off / sizeof(X86PDEPAE);
395 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
396
397 /*
398 * Causes trouble when the guest uses a PDE to refer to the whole page table level
399 * structure. (Invalidate here; faults later on when it tries to change the page
400 * table entries -> recheck; probably only applies to the RC case.)
401 */
402 if (uShw.pPDPae->a[iShw].n.u1Present)
403 {
404 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
405 pgmPoolFree(pVM,
406 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
407 pPage->idx,
408 iShw);
409 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
410 }
411
412 /* paranoia / a bit assumptive. */
413 if ( (off & 7)
414 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
415 {
416 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
417 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
418
419 if (uShw.pPDPae->a[iShw2].n.u1Present)
420 {
421 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
422 pgmPoolFree(pVM,
423 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
424 pPage->idx,
425 iShw2);
426 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
427 }
428 }
429 break;
430 }
431
432 case PGMPOOLKIND_PAE_PDPT:
433 {
434 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
435 /*
436 * Hopefully this doesn't happen very often:
437 * - touching unused parts of the page
438 * - messing with the bits of pd pointers without changing the physical address
439 */
440 /* PDPT roots are not page aligned; 32 byte only! */
441 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
442
443 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
444 const unsigned iShw = offPdpt / sizeof(X86PDPE);
445 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
446 {
447 if (uShw.pPDPT->a[iShw].n.u1Present)
448 {
449 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
450 pgmPoolFree(pVM,
451 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
452 pPage->idx,
453 iShw);
454 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
455 }
456
457 /* paranoia / a bit assumptive. */
458 if ( (offPdpt & 7)
459 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
460 {
461 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
462 if ( iShw2 != iShw
463 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
464 {
465 if (uShw.pPDPT->a[iShw2].n.u1Present)
466 {
467 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
468 pgmPoolFree(pVM,
469 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
470 pPage->idx,
471 iShw2);
472 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
473 }
474 }
475 }
476 }
477 break;
478 }
479
480 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
481 {
482 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
483 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
484 const unsigned iShw = off / sizeof(X86PDEPAE);
485 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
486 if (uShw.pPDPae->a[iShw].n.u1Present)
487 {
488 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
489 pgmPoolFree(pVM,
490 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
491 pPage->idx,
492 iShw);
493 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
494 }
495 /* paranoia / a bit assumptive. */
496 if ( (off & 7)
497 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
498 {
499 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
500 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
501
502 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
503 if (uShw.pPDPae->a[iShw2].n.u1Present)
504 {
505 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
506 pgmPoolFree(pVM,
507 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
508 pPage->idx,
509 iShw2);
510 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
511 }
512 }
513 break;
514 }
515
516 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
517 {
518 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
519 /*
520 * Hopefully this doesn't happen very often:
521 * - messing with the bits of pd pointers without changing the physical address
522 */
523 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
524 const unsigned iShw = off / sizeof(X86PDPE);
525 if (uShw.pPDPT->a[iShw].n.u1Present)
526 {
527 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
528 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
529 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
530 }
531 /* paranoia / a bit assumptive. */
532 if ( (off & 7)
533 && (off & 7) + cbWrite > sizeof(X86PDPE))
534 {
535 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
536 if (uShw.pPDPT->a[iShw2].n.u1Present)
537 {
538 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
539 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
540 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
541 }
542 }
543 break;
544 }
545
546 case PGMPOOLKIND_64BIT_PML4:
547 {
548 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
549 /*
550 * Hopefully this doesn't happen very often:
551 * - messing with the bits of pd pointers without changing the physical address
552 */
553 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
554 const unsigned iShw = off / sizeof(X86PDPE);
555 if (uShw.pPML4->a[iShw].n.u1Present)
556 {
557 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
558 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
559 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
560 }
561 /* paranoia / a bit assumptive. */
562 if ( (off & 7)
563 && (off & 7) + cbWrite > sizeof(X86PDPE))
564 {
565 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
566 if (uShw.pPML4->a[iShw2].n.u1Present)
567 {
568 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
569 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
570 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
571 }
572 }
573 break;
574 }
575
576 default:
577 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
578 }
579 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
580
581 /* next */
582 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
583 return;
584 pPage = &pPool->aPages[pPage->iMonitoredNext];
585 }
586}
587
588#ifndef IN_RING3
589
590/**
591 * Checks if a access could be a fork operation in progress.
592 *
593 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
594 *
595 * @returns true if it's likely that we're forking, otherwise false.
596 * @param pPool The pool.
597 * @param pDis The disassembled instruction.
598 * @param offFault The access offset.
599 */
600DECLINLINE(bool) pgmRZPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
601{
602 /*
603 * i386 linux is using btr to clear X86_PTE_RW.
604 * The functions involved are (2.6.16 source inspection):
605 * clear_bit
606 * ptep_set_wrprotect
607 * copy_one_pte
608 * copy_pte_range
609 * copy_pmd_range
610 * copy_pud_range
611 * copy_page_range
612 * dup_mmap
613 * dup_mm
614 * copy_mm
615 * copy_process
616 * do_fork
617 */
618 if ( pDis->pCurInstr->uOpcode == OP_BTR
619 && !(offFault & 4)
620 /** @todo Validate that the bit index is X86_PTE_RW. */
621 )
622 {
623 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,Fork)); RT_NOREF_PV(pPool);
624 return true;
625 }
626 return false;
627}
628
629
630/**
631 * Determine whether the page is likely to have been reused.
632 *
633 * @returns true if we consider the page as being reused for a different purpose.
634 * @returns false if we consider it to still be a paging page.
635 * @param pVM The cross context VM structure.
636 * @param pVCpu The cross context virtual CPU structure.
637 * @param pRegFrame Trap register frame.
638 * @param pDis The disassembly info for the faulting instruction.
639 * @param pvFault The fault address.
640 * @param pPage The pool page being accessed.
641 *
642 * @remark The REP prefix check is left to the caller because of STOSD/W.
643 */
644DECLINLINE(bool) pgmRZPoolMonitorIsReused(PVMCC pVM, PVMCPUCC pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault,
645 PPGMPOOLPAGE pPage)
646{
647 /* Locked (CR3, PDPTR*4) should not be reusable. Considering them as
648 such may cause loops booting tst-ubuntu-15_10-64-efi, ++. */
649 if (pPage->cLocked)
650 {
651 Log2(("pgmRZPoolMonitorIsReused: %RGv (%p) can't have been resued, because it's locked!\n", pvFault, pPage));
652 return false;
653 }
654
655 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
656 if ( HMHasPendingIrq(pVM)
657 && pRegFrame->rsp - pvFault < 32)
658 {
659 /* Fault caused by stack writes while trying to inject an interrupt event. */
660 Log(("pgmRZPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
661 return true;
662 }
663
664 LogFlow(("Reused instr %RGv %d at %RGv param1.fUse=%llx param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->uOpcode, pvFault, pDis->Param1.fUse, pDis->Param1.Base.idxGenReg));
665
666 /* Non-supervisor mode write means it's used for something else. */
667 if (CPUMGetGuestCPL(pVCpu) == 3)
668 return true;
669
670 switch (pDis->pCurInstr->uOpcode)
671 {
672 /* call implies the actual push of the return address faulted */
673 case OP_CALL:
674 Log4(("pgmRZPoolMonitorIsReused: CALL\n"));
675 return true;
676 case OP_PUSH:
677 Log4(("pgmRZPoolMonitorIsReused: PUSH\n"));
678 return true;
679 case OP_PUSHF:
680 Log4(("pgmRZPoolMonitorIsReused: PUSHF\n"));
681 return true;
682 case OP_PUSHA:
683 Log4(("pgmRZPoolMonitorIsReused: PUSHA\n"));
684 return true;
685 case OP_FXSAVE:
686 Log4(("pgmRZPoolMonitorIsReused: FXSAVE\n"));
687 return true;
688 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
689 Log4(("pgmRZPoolMonitorIsReused: MOVNTI\n"));
690 return true;
691 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
692 Log4(("pgmRZPoolMonitorIsReused: MOVNTDQ\n"));
693 return true;
694 case OP_MOVSWD:
695 case OP_STOSWD:
696 if ( pDis->fPrefix == (DISPREFIX_REP|DISPREFIX_REX)
697 && pRegFrame->rcx >= 0x40
698 )
699 {
700 Assert(pDis->uCpuMode == DISCPUMODE_64BIT);
701
702 Log(("pgmRZPoolMonitorIsReused: OP_STOSQ\n"));
703 return true;
704 }
705 break;
706
707 default:
708 /*
709 * Anything having ESP on the left side means stack writes.
710 */
711 if ( ( (pDis->Param1.fUse & DISUSE_REG_GEN32)
712 || (pDis->Param1.fUse & DISUSE_REG_GEN64))
713 && (pDis->Param1.Base.idxGenReg == DISGREG_ESP))
714 {
715 Log4(("pgmRZPoolMonitorIsReused: ESP\n"));
716 return true;
717 }
718 break;
719 }
720
721 /*
722 * Page table updates are very very unlikely to be crossing page boundraries,
723 * and we don't want to deal with that in pgmPoolMonitorChainChanging and such.
724 */
725 uint32_t const cbWrite = DISGetParamSize(pDis, &pDis->Param1);
726 if ( (((uintptr_t)pvFault + cbWrite) >> X86_PAGE_SHIFT) != ((uintptr_t)pvFault >> X86_PAGE_SHIFT) )
727 {
728 Log4(("pgmRZPoolMonitorIsReused: cross page write\n"));
729 return true;
730 }
731
732 /*
733 * Nobody does an unaligned 8 byte write to a page table, right.
734 */
735 if (cbWrite >= 8 && ((uintptr_t)pvFault & 7) != 0)
736 {
737 Log4(("pgmRZPoolMonitorIsReused: Unaligned 8+ byte write\n"));
738 return true;
739 }
740
741 return false;
742}
743
744
745/**
746 * Flushes the page being accessed.
747 *
748 * @returns VBox status code suitable for scheduling.
749 * @param pVM The cross context VM structure.
750 * @param pVCpu The cross context virtual CPU structure.
751 * @param pPool The pool.
752 * @param pPage The pool page (head).
753 * @param pDis The disassembly of the write instruction.
754 * @param pRegFrame The trap register frame.
755 * @param GCPhysFault The fault address as guest physical address.
756 * @param pvFault The fault address.
757 * @todo VBOXSTRICTRC
758 */
759static int pgmRZPoolAccessPfHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
760 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
761{
762 NOREF(pVM); NOREF(GCPhysFault);
763
764 /*
765 * First, do the flushing.
766 */
767 pgmPoolMonitorChainFlush(pPool, pPage);
768
769 /*
770 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
771 * Must do this in raw mode (!); XP boot will fail otherwise.
772 */
773 int rc = VINF_SUCCESS;
774 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
775 if (rc2 == VINF_SUCCESS)
776 { /* do nothing */ }
777 else if (rc2 == VINF_EM_RESCHEDULE)
778 {
779 rc = VBOXSTRICTRC_VAL(rc2);
780# ifndef IN_RING3
781 VMCPU_FF_SET(pVCpu, VMCPU_FF_TO_R3);
782# endif
783 }
784 else if (rc2 == VERR_EM_INTERPRETER)
785 {
786 rc = VINF_EM_RAW_EMULATE_INSTR;
787 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
788 }
789 else if (RT_FAILURE_NP(rc2))
790 rc = VBOXSTRICTRC_VAL(rc2);
791 else
792 AssertMsgFailed(("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
793
794 LogFlow(("pgmRZPoolAccessPfHandlerFlush: returns %Rrc (flushed)\n", rc));
795 return rc;
796}
797
798
799/**
800 * Handles the STOSD write accesses.
801 *
802 * @returns VBox status code suitable for scheduling.
803 * @param pVM The cross context VM structure.
804 * @param pPool The pool.
805 * @param pPage The pool page (head).
806 * @param pDis The disassembly of the write instruction.
807 * @param pRegFrame The trap register frame.
808 * @param GCPhysFault The fault address as guest physical address.
809 * @param pvFault The fault address.
810 */
811DECLINLINE(int) pgmRZPoolAccessPfHandlerSTOSD(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
812 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
813{
814 unsigned uIncrement = pDis->Param1.cb;
815 NOREF(pVM);
816
817 Assert(pDis->uCpuMode == DISCPUMODE_32BIT || pDis->uCpuMode == DISCPUMODE_64BIT);
818 Assert(pRegFrame->rcx <= 0x20);
819
820# ifdef VBOX_STRICT
821 if (pDis->uOpMode == DISCPUMODE_32BIT)
822 Assert(uIncrement == 4);
823 else
824 Assert(uIncrement == 8);
825# endif
826
827 Log3(("pgmRZPoolAccessPfHandlerSTOSD\n"));
828
829 /*
830 * Increment the modification counter and insert it into the list
831 * of modified pages the first time.
832 */
833 if (!pPage->cModifications++)
834 pgmPoolMonitorModifiedInsert(pPool, pPage);
835
836 /*
837 * Execute REP STOSD.
838 *
839 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
840 * write situation, meaning that it's safe to write here.
841 */
842 PVMCPUCC pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
843 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
844 while (pRegFrame->rcx)
845 {
846# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
847 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
848 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
849 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
850# else
851 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
852# endif
853 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
854 pu32 += uIncrement;
855 GCPhysFault += uIncrement;
856 pRegFrame->rdi += uIncrement;
857 pRegFrame->rcx--;
858 }
859 pRegFrame->rip += pDis->cbInstr;
860
861 LogFlow(("pgmRZPoolAccessPfHandlerSTOSD: returns\n"));
862 return VINF_SUCCESS;
863}
864
865
866/**
867 * Handles the simple write accesses.
868 *
869 * @returns VBox status code suitable for scheduling.
870 * @param pVM The cross context VM structure.
871 * @param pVCpu The cross context virtual CPU structure.
872 * @param pPool The pool.
873 * @param pPage The pool page (head).
874 * @param pDis The disassembly of the write instruction.
875 * @param pRegFrame The trap register frame.
876 * @param GCPhysFault The fault address as guest physical address.
877 * @param pvFault The fault address.
878 * @param pfReused Reused state (in/out)
879 */
880DECLINLINE(int) pgmRZPoolAccessPfHandlerSimple(PVMCC pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
881 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
882{
883 Log3(("pgmRZPoolAccessPfHandlerSimple\n"));
884 NOREF(pVM);
885 NOREF(pfReused); /* initialized by caller */
886
887 /*
888 * Increment the modification counter and insert it into the list
889 * of modified pages the first time.
890 */
891 if (!pPage->cModifications++)
892 pgmPoolMonitorModifiedInsert(pPool, pPage);
893
894 /*
895 * Clear all the pages. ASSUMES that pvFault is readable.
896 */
897# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
898 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
899# endif
900
901 uint32_t cbWrite = DISGetParamSize(pDis, &pDis->Param1);
902 if (cbWrite <= 8)
903 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, cbWrite);
904 else if (cbWrite <= 16)
905 {
906 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, 8);
907 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + 8, NULL, cbWrite - 8);
908 }
909 else
910 {
911 Assert(cbWrite <= 32);
912 for (uint32_t off = 0; off < cbWrite; off += 8)
913 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + off, NULL, RT_MIN(8, cbWrite - off));
914 }
915
916# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
917 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
918# endif
919
920 /*
921 * Interpret the instruction.
922 */
923 VBOXSTRICTRC rc = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
924 if (RT_SUCCESS(rc))
925 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
926 else if (rc == VERR_EM_INTERPRETER)
927 {
928 LogFlow(("pgmRZPoolAccessPfHandlerSimple: Interpretation failed for %04x:%RGv - opcode=%d\n",
929 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode));
930 rc = VINF_EM_RAW_EMULATE_INSTR;
931 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
932 }
933
934# if 0 /* experimental code */
935 if (rc == VINF_SUCCESS)
936 {
937 switch (pPage->enmKind)
938 {
939 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
940 {
941 X86PTEPAE GstPte;
942 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
943 AssertRC(rc);
944
945 /* Check the new value written by the guest. If present and with a bogus physical address, then
946 * it's fairly safe to assume the guest is reusing the PT.
947 */
948 if (GstPte.n.u1Present)
949 {
950 RTHCPHYS HCPhys = -1;
951 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
952 if (rc != VINF_SUCCESS)
953 {
954 *pfReused = true;
955 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
956 }
957 }
958 break;
959 }
960 }
961 }
962# endif
963
964 LogFlow(("pgmRZPoolAccessPfHandlerSimple: returns %Rrc\n", VBOXSTRICTRC_VAL(rc)));
965 return VBOXSTRICTRC_VAL(rc);
966}
967
968
969/**
970 * @callback_method_impl{FNPGMRZPHYSPFHANDLER,
971 * \#PF access handler callback for page table pages.}
972 *
973 * @remarks The @a pvUser argument points to the PGMPOOLPAGE.
974 */
975DECLEXPORT(VBOXSTRICTRC) pgmRZPoolAccessPfHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame,
976 RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
977{
978 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorRZ, a);
979 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
980 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
981 unsigned cMaxModifications;
982 bool fForcedFlush = false;
983 NOREF(uErrorCode);
984
985 LogFlow(("pgmRZPoolAccessPfHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
986
987 pgmLock(pVM);
988 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
989 {
990 /* Pool page changed while we were waiting for the lock; ignore. */
991 Log(("CPU%d: pgmRZPoolAccessPfHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
992 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
993 pgmUnlock(pVM);
994 return VINF_SUCCESS;
995 }
996# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
997 if (pPage->fDirty)
998 {
999 Assert(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH));
1000 pgmUnlock(pVM);
1001 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1002 }
1003# endif
1004
1005# if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1006 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1007 {
1008 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1009 void *pvGst;
1010 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1011 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1012 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1013 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1014 }
1015# endif
1016
1017 /*
1018 * Disassemble the faulting instruction.
1019 */
1020 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1021 int rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
1022 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1023 {
1024 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1025 pgmUnlock(pVM);
1026 return rc;
1027 }
1028
1029 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1030
1031 /*
1032 * We should ALWAYS have the list head as user parameter. This
1033 * is because we use that page to record the changes.
1034 */
1035 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1036
1037# ifdef IN_RING0
1038 /* Maximum nr of modifications depends on the page type. */
1039 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1040 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1041 cMaxModifications = 4;
1042 else
1043 cMaxModifications = 24;
1044# else
1045 cMaxModifications = 48;
1046# endif
1047
1048 /*
1049 * Incremental page table updates should weigh more than random ones.
1050 * (Only applies when started from offset 0)
1051 */
1052 pVCpu->pgm.s.cPoolAccessHandler++;
1053 if ( pPage->GCPtrLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1054 && pPage->GCPtrLastAccessHandlerRip < pRegFrame->rip + 0x40
1055 && pvFault == (pPage->GCPtrLastAccessHandlerFault + pDis->Param1.cb)
1056 && pVCpu->pgm.s.cPoolAccessHandler == pPage->cLastAccessHandler + 1)
1057 {
1058 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1059 Assert(pPage->cModifications < 32000);
1060 pPage->cModifications = pPage->cModifications * 2;
1061 pPage->GCPtrLastAccessHandlerFault = pvFault;
1062 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1063 if (pPage->cModifications >= cMaxModifications)
1064 {
1065 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushReinit);
1066 fForcedFlush = true;
1067 }
1068 }
1069
1070 if (pPage->cModifications >= cMaxModifications)
1071 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1072
1073 /*
1074 * Check if it's worth dealing with.
1075 */
1076 bool fReused = false;
1077 bool fNotReusedNotForking = false;
1078 if ( ( pPage->cModifications < cMaxModifications /** @todo \#define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1079 || pgmPoolIsPageLocked(pPage)
1080 )
1081 && !(fReused = pgmRZPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault, pPage))
1082 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1083 {
1084 /*
1085 * Simple instructions, no REP prefix.
1086 */
1087 if (!(pDis->fPrefix & (DISPREFIX_REP | DISPREFIX_REPNE)))
1088 {
1089 rc = pgmRZPoolAccessPfHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1090 if (fReused)
1091 goto flushPage;
1092
1093 /* A mov instruction to change the first page table entry will be remembered so we can detect
1094 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1095 */
1096 if ( rc == VINF_SUCCESS
1097 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1098 && pDis->pCurInstr->uOpcode == OP_MOV
1099 && (pvFault & PAGE_OFFSET_MASK) == 0)
1100 {
1101 pPage->GCPtrLastAccessHandlerFault = pvFault;
1102 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1103 pPage->GCPtrLastAccessHandlerRip = pRegFrame->rip;
1104 /* Make sure we don't kick out a page too quickly. */
1105 if (pPage->cModifications > 8)
1106 pPage->cModifications = 2;
1107 }
1108 else if (pPage->GCPtrLastAccessHandlerFault == pvFault)
1109 {
1110 /* ignore the 2nd write to this page table entry. */
1111 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1112 }
1113 else
1114 {
1115 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
1116 pPage->GCPtrLastAccessHandlerRip = 0;
1117 }
1118
1119 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
1120 pgmUnlock(pVM);
1121 return rc;
1122 }
1123
1124 /*
1125 * Windows is frequently doing small memset() operations (netio test 4k+).
1126 * We have to deal with these or we'll kill the cache and performance.
1127 */
1128 if ( pDis->pCurInstr->uOpcode == OP_STOSWD
1129 && !pRegFrame->eflags.Bits.u1DF
1130 && pDis->uOpMode == pDis->uCpuMode
1131 && pDis->uAddrMode == pDis->uCpuMode)
1132 {
1133 bool fValidStosd = false;
1134
1135 if ( pDis->uCpuMode == DISCPUMODE_32BIT
1136 && pDis->fPrefix == DISPREFIX_REP
1137 && pRegFrame->ecx <= 0x20
1138 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1139 && !((uintptr_t)pvFault & 3)
1140 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1141 )
1142 {
1143 fValidStosd = true;
1144 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1145 }
1146 else
1147 if ( pDis->uCpuMode == DISCPUMODE_64BIT
1148 && pDis->fPrefix == (DISPREFIX_REP | DISPREFIX_REX)
1149 && pRegFrame->rcx <= 0x20
1150 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1151 && !((uintptr_t)pvFault & 7)
1152 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1153 )
1154 {
1155 fValidStosd = true;
1156 }
1157
1158 if (fValidStosd)
1159 {
1160 rc = pgmRZPoolAccessPfHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1161 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZRepStosd, a);
1162 pgmUnlock(pVM);
1163 return rc;
1164 }
1165 }
1166
1167 /* REP prefix, don't bother. */
1168 STAM_COUNTER_INC(&pPool->StatMonitorPfRZRepPrefix);
1169 Log4(("pgmRZPoolAccessPfHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1170 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode, pDis->fPrefix));
1171 fNotReusedNotForking = true;
1172 }
1173
1174# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1175 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1176 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1177 */
1178 if ( pPage->cModifications >= cMaxModifications
1179 && !fForcedFlush
1180 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1181 && ( fNotReusedNotForking
1182 || ( !pgmRZPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault, pPage)
1183 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1184 )
1185 )
1186 {
1187 Assert(!pgmPoolIsPageLocked(pPage));
1188 Assert(pPage->fDirty == false);
1189
1190 /* Flush any monitored duplicates as we will disable write protection. */
1191 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1192 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1193 {
1194 PPGMPOOLPAGE pPageHead = pPage;
1195
1196 /* Find the monitor head. */
1197 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1198 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1199
1200 while (pPageHead)
1201 {
1202 unsigned idxNext = pPageHead->iMonitoredNext;
1203
1204 if (pPageHead != pPage)
1205 {
1206 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1207 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1208 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1209 AssertRC(rc2);
1210 }
1211
1212 if (idxNext == NIL_PGMPOOL_IDX)
1213 break;
1214
1215 pPageHead = &pPool->aPages[idxNext];
1216 }
1217 }
1218
1219 /* The flushing above might fail for locked pages, so double check. */
1220 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1221 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1222 {
1223 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1224
1225 /* Temporarily allow write access to the page table again. */
1226 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK, pPage->GCPhys & PAGE_BASE_GC_MASK);
1227 if (rc == VINF_SUCCESS)
1228 {
1229 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1230 AssertMsg(rc == VINF_SUCCESS
1231 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1232 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1233 || rc == VERR_PAGE_NOT_PRESENT,
1234 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1235# ifdef VBOX_STRICT
1236 pPage->GCPtrDirtyFault = pvFault;
1237# endif
1238
1239 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, a);
1240 pgmUnlock(pVM);
1241 return rc;
1242 }
1243 }
1244 }
1245# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT && IN_RING0 */
1246
1247 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushModOverflow);
1248flushPage:
1249 /*
1250 * Not worth it, so flush it.
1251 *
1252 * If we considered it to be reused, don't go back to ring-3
1253 * to emulate failed instructions since we usually cannot
1254 * interpret then. This may be a bit risky, in which case
1255 * the reuse detection must be fixed.
1256 */
1257 rc = pgmRZPoolAccessPfHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1258 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1259 && fReused)
1260 {
1261 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1262 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1263 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1264 }
1265 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZFlushPage, a);
1266 pgmUnlock(pVM);
1267 return rc;
1268}
1269
1270#endif /* !IN_RING3 */
1271
1272/**
1273 * @callback_method_impl{FNPGMPHYSHANDLER,
1274 * Access handler for shadowed page table pages.}
1275 *
1276 * @remarks Only uses the VINF_PGM_HANDLER_DO_DEFAULT status.
1277 */
1278PGM_ALL_CB2_DECL(VBOXSTRICTRC)
1279pgmPoolAccessHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf,
1280 PGMACCESSTYPE enmAccessType, PGMACCESSORIGIN enmOrigin, void *pvUser)
1281{
1282 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1283 STAM_PROFILE_START(&pPool->CTX_SUFF_Z(StatMonitor), a);
1284 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1285 LogFlow(("PGM_ALL_CB_DECL: GCPhys=%RGp %p:{.Core=%RHp, .idx=%d, .GCPhys=%RGp, .enmType=%d}\n",
1286 GCPhys, pPage, pPage->Core.Key, pPage->idx, pPage->GCPhys, pPage->enmKind));
1287
1288 NOREF(pvPhys); NOREF(pvBuf); NOREF(enmAccessType);
1289
1290 pgmLock(pVM);
1291
1292#ifdef VBOX_WITH_STATISTICS
1293 /*
1294 * Collect stats on the access.
1295 */
1296 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Sizes)) == 19);
1297 if (cbBuf <= 16 && cbBuf > 0)
1298 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[cbBuf - 1]);
1299 else if (cbBuf >= 17 && cbBuf < 32)
1300 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[16]);
1301 else if (cbBuf >= 32 && cbBuf < 64)
1302 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[17]);
1303 else if (cbBuf >= 64)
1304 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[18]);
1305
1306 uint8_t cbAlign;
1307 switch (pPage->enmKind)
1308 {
1309 default:
1310 cbAlign = 7;
1311 break;
1312 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1313 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1314 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1315 case PGMPOOLKIND_32BIT_PD:
1316 case PGMPOOLKIND_32BIT_PD_PHYS:
1317 cbAlign = 3;
1318 break;
1319 }
1320 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Misaligned)) == 7);
1321 if ((uint8_t)GCPhys & cbAlign)
1322 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Misaligned)[((uint8_t)GCPhys & cbAlign) - 1]);
1323#endif
1324
1325 /*
1326 * Make sure the pool page wasn't modified by a different CPU.
1327 */
1328 if (PHYS_PAGE_ADDRESS(GCPhys) == PHYS_PAGE_ADDRESS(pPage->GCPhys))
1329 {
1330 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1331
1332 /* The max modification count before flushing depends on the context and page type. */
1333#ifdef IN_RING3
1334 uint16_t const cMaxModifications = 96; /* it's cheaper here, right? */
1335#else
1336 uint16_t cMaxModifications;
1337 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1338 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1339 cMaxModifications = 4;
1340 else
1341 cMaxModifications = 24;
1342#endif
1343
1344 /*
1345 * We don't have to be very sophisticated about this since there are relativly few calls here.
1346 * However, we must try our best to detect any non-cpu accesses (disk / networking).
1347 */
1348 if ( ( pPage->cModifications < cMaxModifications
1349 || pgmPoolIsPageLocked(pPage) )
1350 && enmOrigin != PGMACCESSORIGIN_DEVICE
1351 && cbBuf <= 16)
1352 {
1353 /* Clear the shadow entry. */
1354 if (!pPage->cModifications++)
1355 pgmPoolMonitorModifiedInsert(pPool, pPage);
1356
1357 if (cbBuf <= 8)
1358 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, (uint32_t)cbBuf);
1359 else
1360 {
1361 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, 8);
1362 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys + 8, (uint8_t *)pvBuf + 8, (uint32_t)cbBuf - 8);
1363 }
1364 }
1365 else
1366 pgmPoolMonitorChainFlush(pPool, pPage);
1367
1368 STAM_PROFILE_STOP_EX(&pPool->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1369 }
1370 else
1371 Log(("CPU%d: PGM_ALL_CB_DECL pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhys), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1372 pgmUnlock(pVM);
1373 return VINF_PGM_HANDLER_DO_DEFAULT;
1374}
1375
1376
1377#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1378
1379# if defined(VBOX_STRICT) && !defined(IN_RING3)
1380
1381/**
1382 * Check references to guest physical memory in a PAE / PAE page table.
1383 *
1384 * @param pPool The pool.
1385 * @param pPage The page.
1386 * @param pShwPT The shadow page table (mapping of the page).
1387 * @param pGstPT The guest page table.
1388 */
1389static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1390{
1391 unsigned cErrors = 0;
1392 int LastRc = -1; /* initialized to shut up gcc */
1393 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1394 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1395 PVMCC pVM = pPool->CTX_SUFF(pVM);
1396
1397# ifdef VBOX_STRICT
1398 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1399 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1400# endif
1401 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1402 {
1403 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1404 {
1405 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1406 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1407 if ( rc != VINF_SUCCESS
1408 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1409 {
1410 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1411 LastPTE = i;
1412 LastRc = rc;
1413 LastHCPhys = HCPhys;
1414 cErrors++;
1415
1416 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1417 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1418 AssertRC(rc);
1419
1420 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1421 {
1422 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1423
1424 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1425 {
1426 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1427
1428 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1429 {
1430 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1431 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1432 {
1433 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1434 }
1435 }
1436
1437 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1438 }
1439 }
1440 }
1441 }
1442 }
1443 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1444}
1445
1446
1447/**
1448 * Check references to guest physical memory in a PAE / 32-bit page table.
1449 *
1450 * @param pPool The pool.
1451 * @param pPage The page.
1452 * @param pShwPT The shadow page table (mapping of the page).
1453 * @param pGstPT The guest page table.
1454 */
1455static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1456{
1457 unsigned cErrors = 0;
1458 int LastRc = -1; /* initialized to shut up gcc */
1459 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1460 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1461 PVMCC pVM = pPool->CTX_SUFF(pVM);
1462
1463# ifdef VBOX_STRICT
1464 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1465 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1466# endif
1467 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1468 {
1469 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1470 {
1471 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1472 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1473 if ( rc != VINF_SUCCESS
1474 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1475 {
1476 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1477 LastPTE = i;
1478 LastRc = rc;
1479 LastHCPhys = HCPhys;
1480 cErrors++;
1481
1482 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1483 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1484 AssertRC(rc);
1485
1486 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1487 {
1488 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1489
1490 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1491 {
1492 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1493
1494 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1495 {
1496 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1497 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1498 {
1499 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1500 }
1501 }
1502
1503 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1504 }
1505 }
1506 }
1507 }
1508 }
1509 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1510}
1511
1512# endif /* VBOX_STRICT && !IN_RING3 */
1513
1514/**
1515 * Clear references to guest physical memory in a PAE / PAE page table.
1516 *
1517 * @returns nr of changed PTEs
1518 * @param pPool The pool.
1519 * @param pPage The page.
1520 * @param pShwPT The shadow page table (mapping of the page).
1521 * @param pGstPT The guest page table.
1522 * @param pOldGstPT The old cached guest page table.
1523 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1524 * @param pfFlush Flush reused page table (out)
1525 */
1526DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1527 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1528{
1529 unsigned cChanged = 0;
1530
1531# ifdef VBOX_STRICT
1532 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1533 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1534# endif
1535 *pfFlush = false;
1536
1537 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1538 {
1539 /* Check the new value written by the guest. If present and with a bogus physical address, then
1540 * it's fairly safe to assume the guest is reusing the PT.
1541 */
1542 if ( fAllowRemoval
1543 && pGstPT->a[i].n.u1Present)
1544 {
1545 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1546 {
1547 *pfFlush = true;
1548 return ++cChanged;
1549 }
1550 }
1551 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1552 {
1553 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1554 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1555 {
1556# ifdef VBOX_STRICT
1557 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1558 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1559 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1560# endif
1561 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1562 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1563 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1564 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1565
1566 if ( uHostAttr == uGuestAttr
1567 && fHostRW <= fGuestRW)
1568 continue;
1569 }
1570 cChanged++;
1571 /* Something was changed, so flush it. */
1572 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1573 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1574 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1575 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1576 }
1577 }
1578 return cChanged;
1579}
1580
1581
1582/**
1583 * Clear references to guest physical memory in a PAE / PAE page table.
1584 *
1585 * @returns nr of changed PTEs
1586 * @param pPool The pool.
1587 * @param pPage The page.
1588 * @param pShwPT The shadow page table (mapping of the page).
1589 * @param pGstPT The guest page table.
1590 * @param pOldGstPT The old cached guest page table.
1591 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1592 * @param pfFlush Flush reused page table (out)
1593 */
1594DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1595 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1596{
1597 unsigned cChanged = 0;
1598
1599# ifdef VBOX_STRICT
1600 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1601 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1602# endif
1603 *pfFlush = false;
1604
1605 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1606 {
1607 /* Check the new value written by the guest. If present and with a bogus physical address, then
1608 * it's fairly safe to assume the guest is reusing the PT.
1609 */
1610 if ( fAllowRemoval
1611 && pGstPT->a[i].n.u1Present)
1612 {
1613 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK))
1614 {
1615 *pfFlush = true;
1616 return ++cChanged;
1617 }
1618 }
1619 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1620 {
1621 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1622 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1623 {
1624# ifdef VBOX_STRICT
1625 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1626 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1627 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1628# endif
1629 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1630 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1631 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1632 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1633
1634 if ( uHostAttr == uGuestAttr
1635 && fHostRW <= fGuestRW)
1636 continue;
1637 }
1638 cChanged++;
1639 /* Something was changed, so flush it. */
1640 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1641 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1642 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1643 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1644 }
1645 }
1646 return cChanged;
1647}
1648
1649
1650/**
1651 * Flush a dirty page
1652 *
1653 * @param pVM The cross context VM structure.
1654 * @param pPool The pool.
1655 * @param idxSlot Dirty array slot index
1656 * @param fAllowRemoval Allow a reused page table to be removed
1657 */
1658static void pgmPoolFlushDirtyPage(PVMCC pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1659{
1660 AssertCompile(RT_ELEMENTS(pPool->aidxDirtyPages) == RT_ELEMENTS(pPool->aDirtyPages));
1661
1662 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1663 unsigned idxPage = pPool->aidxDirtyPages[idxSlot];
1664 if (idxPage == NIL_PGMPOOL_IDX)
1665 return;
1666
1667 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1668 Assert(pPage->idx == idxPage);
1669 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1670
1671 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1672 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1673
1674# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1675 PVMCPU pVCpu = VMMGetCpu(pVM);
1676 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
1677# endif
1678
1679 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1680 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK);
1681 Assert(rc == VINF_SUCCESS);
1682 pPage->fDirty = false;
1683
1684# ifdef VBOX_STRICT
1685 uint64_t fFlags = 0;
1686 RTHCPHYS HCPhys;
1687 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->GCPtrDirtyFault, &fFlags, &HCPhys);
1688 AssertMsg( ( rc == VINF_SUCCESS
1689 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1690 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1691 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1692 || rc == VERR_PAGE_NOT_PRESENT,
1693 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->GCPtrDirtyFault, rc, fFlags));
1694# endif
1695
1696 /* Flush those PTEs that have changed. */
1697 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1698 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1699 void *pvGst;
1700 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1701 bool fFlush;
1702 unsigned cChanges;
1703
1704 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1705 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1706 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1707 else
1708 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1709 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1710
1711 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1712 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1713 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1714 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1715
1716 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1717 Assert(pPage->cModifications);
1718 if (cChanges < 4)
1719 pPage->cModifications = 1; /* must use > 0 here */
1720 else
1721 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1722
1723 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1724 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1725 pPool->idxFreeDirtyPage = idxSlot;
1726
1727 pPool->cDirtyPages--;
1728 pPool->aidxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1729 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1730 if (fFlush)
1731 {
1732 Assert(fAllowRemoval);
1733 Log(("Flush reused page table!\n"));
1734 pgmPoolFlushPage(pPool, pPage);
1735 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1736 }
1737 else
1738 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1739
1740# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1741 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
1742# endif
1743}
1744
1745
1746# ifndef IN_RING3
1747/**
1748 * Add a new dirty page
1749 *
1750 * @param pVM The cross context VM structure.
1751 * @param pPool The pool.
1752 * @param pPage The page.
1753 */
1754void pgmPoolAddDirtyPage(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1755{
1756 PGM_LOCK_ASSERT_OWNER(pVM);
1757 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1758 Assert(!pPage->fDirty);
1759
1760 unsigned idxFree = pPool->idxFreeDirtyPage;
1761 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1762 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1763
1764 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1765 {
1766 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1767 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1768 }
1769 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1770 AssertMsg(pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1771
1772 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1773
1774 /*
1775 * Make a copy of the guest page table as we require valid GCPhys addresses
1776 * when removing references to physical pages.
1777 * (The HCPhys linear lookup is *extremely* expensive!)
1778 */
1779 void *pvGst;
1780 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1781 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst, (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT) ? PAGE_SIZE : PAGE_SIZE/2);
1782# ifdef VBOX_STRICT
1783 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1784 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1785 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1786 else
1787 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1788 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1789# endif
1790 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1791
1792 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1793 pPage->fDirty = true;
1794 pPage->idxDirtyEntry = (uint8_t)idxFree; Assert(pPage->idxDirtyEntry == idxFree);
1795 pPool->aidxDirtyPages[idxFree] = pPage->idx;
1796 pPool->cDirtyPages++;
1797
1798 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1799 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1800 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1801 {
1802 unsigned i;
1803 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1804 {
1805 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1806 if (pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1807 {
1808 pPool->idxFreeDirtyPage = idxFree;
1809 break;
1810 }
1811 }
1812 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1813 }
1814
1815 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1816
1817 /*
1818 * Clear all references to this shadow table. See @bugref{7298}.
1819 */
1820 pgmPoolTrackClearPageUsers(pPool, pPage);
1821}
1822# endif /* !IN_RING3 */
1823
1824
1825/**
1826 * Check if the specified page is dirty (not write monitored)
1827 *
1828 * @return dirty or not
1829 * @param pVM The cross context VM structure.
1830 * @param GCPhys Guest physical address
1831 */
1832bool pgmPoolIsDirtyPageSlow(PVM pVM, RTGCPHYS GCPhys)
1833{
1834 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1835 PGM_LOCK_ASSERT_OWNER(pVM);
1836 if (!pPool->cDirtyPages)
1837 return false;
1838
1839 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1840
1841 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1842 {
1843 unsigned idxPage = pPool->aidxDirtyPages[i];
1844 if (idxPage != NIL_PGMPOOL_IDX)
1845 {
1846 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1847 if (pPage->GCPhys == GCPhys)
1848 return true;
1849 }
1850 }
1851 return false;
1852}
1853
1854
1855/**
1856 * Reset all dirty pages by reinstating page monitoring.
1857 *
1858 * @param pVM The cross context VM structure.
1859 */
1860void pgmPoolResetDirtyPages(PVMCC pVM)
1861{
1862 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1863 PGM_LOCK_ASSERT_OWNER(pVM);
1864 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1865
1866 if (!pPool->cDirtyPages)
1867 return;
1868
1869 Log(("pgmPoolResetDirtyPages\n"));
1870 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1871 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1872
1873 pPool->idxFreeDirtyPage = 0;
1874 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1875 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1876 {
1877 unsigned i;
1878 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1879 {
1880 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
1881 {
1882 pPool->idxFreeDirtyPage = i;
1883 break;
1884 }
1885 }
1886 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1887 }
1888
1889 Assert(pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
1890 return;
1891}
1892
1893
1894/**
1895 * Invalidate the PT entry for the specified page
1896 *
1897 * @param pVM The cross context VM structure.
1898 * @param GCPtrPage Guest page to invalidate
1899 */
1900void pgmPoolResetDirtyPage(PVM pVM, RTGCPTR GCPtrPage)
1901{
1902 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1903 PGM_LOCK_ASSERT_OWNER(pVM);
1904 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1905
1906 if (!pPool->cDirtyPages)
1907 return;
1908
1909 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage)); RT_NOREF_PV(GCPtrPage);
1910 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1911 {
1912 /** @todo What was intended here??? This looks incomplete... */
1913 }
1914}
1915
1916
1917/**
1918 * Reset all dirty pages by reinstating page monitoring.
1919 *
1920 * @param pVM The cross context VM structure.
1921 * @param GCPhysPT Physical address of the page table
1922 */
1923void pgmPoolInvalidateDirtyPage(PVMCC pVM, RTGCPHYS GCPhysPT)
1924{
1925 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1926 PGM_LOCK_ASSERT_OWNER(pVM);
1927 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1928 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
1929
1930 if (!pPool->cDirtyPages)
1931 return;
1932
1933 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1934
1935 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1936 {
1937 unsigned idxPage = pPool->aidxDirtyPages[i];
1938 if (idxPage != NIL_PGMPOOL_IDX)
1939 {
1940 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1941 if (pPage->GCPhys == GCPhysPT)
1942 {
1943 idxDirtyPage = i;
1944 break;
1945 }
1946 }
1947 }
1948
1949 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
1950 {
1951 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
1952 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1953 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1954 {
1955 unsigned i;
1956 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1957 {
1958 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
1959 {
1960 pPool->idxFreeDirtyPage = i;
1961 break;
1962 }
1963 }
1964 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1965 }
1966 }
1967}
1968
1969#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1970
1971/**
1972 * Inserts a page into the GCPhys hash table.
1973 *
1974 * @param pPool The pool.
1975 * @param pPage The page.
1976 */
1977DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1978{
1979 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1980 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1981 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1982 pPage->iNext = pPool->aiHash[iHash];
1983 pPool->aiHash[iHash] = pPage->idx;
1984}
1985
1986
1987/**
1988 * Removes a page from the GCPhys hash table.
1989 *
1990 * @param pPool The pool.
1991 * @param pPage The page.
1992 */
1993DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1994{
1995 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1996 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1997 if (pPool->aiHash[iHash] == pPage->idx)
1998 pPool->aiHash[iHash] = pPage->iNext;
1999 else
2000 {
2001 uint16_t iPrev = pPool->aiHash[iHash];
2002 for (;;)
2003 {
2004 const int16_t i = pPool->aPages[iPrev].iNext;
2005 if (i == pPage->idx)
2006 {
2007 pPool->aPages[iPrev].iNext = pPage->iNext;
2008 break;
2009 }
2010 if (i == NIL_PGMPOOL_IDX)
2011 {
2012 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
2013 break;
2014 }
2015 iPrev = i;
2016 }
2017 }
2018 pPage->iNext = NIL_PGMPOOL_IDX;
2019}
2020
2021
2022/**
2023 * Frees up one cache page.
2024 *
2025 * @returns VBox status code.
2026 * @retval VINF_SUCCESS on success.
2027 * @param pPool The pool.
2028 * @param iUser The user index.
2029 */
2030static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
2031{
2032 const PVMCC pVM = pPool->CTX_SUFF(pVM);
2033 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
2034 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
2035
2036 /*
2037 * Select one page from the tail of the age list.
2038 */
2039 PPGMPOOLPAGE pPage;
2040 for (unsigned iLoop = 0; ; iLoop++)
2041 {
2042 uint16_t iToFree = pPool->iAgeTail;
2043 if (iToFree == iUser && iUser != NIL_PGMPOOL_IDX)
2044 iToFree = pPool->aPages[iToFree].iAgePrev;
2045/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2046 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2047 {
2048 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2049 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2050 {
2051 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2052 continue;
2053 iToFree = i;
2054 break;
2055 }
2056 }
2057*/
2058 Assert(iToFree != iUser);
2059 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
2060 pPage = &pPool->aPages[iToFree];
2061
2062 /*
2063 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2064 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2065 */
2066 if ( !pgmPoolIsPageLocked(pPage)
2067 && pPage->idx >= PGMPOOL_IDX_FIRST /* paranoia (#6349) */)
2068 break;
2069 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2070 pgmPoolCacheUsed(pPool, pPage);
2071 AssertLogRelReturn(iLoop < 8192, VERR_PGM_POOL_TOO_MANY_LOOPS);
2072 }
2073
2074 /*
2075 * Found a usable page, flush it and return.
2076 */
2077 int rc = pgmPoolFlushPage(pPool, pPage);
2078 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2079 /** @todo find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2080 if (rc == VINF_SUCCESS)
2081 PGM_INVL_ALL_VCPU_TLBS(pVM);
2082 return rc;
2083}
2084
2085
2086/**
2087 * Checks if a kind mismatch is really a page being reused
2088 * or if it's just normal remappings.
2089 *
2090 * @returns true if reused and the cached page (enmKind1) should be flushed
2091 * @returns false if not reused.
2092 * @param enmKind1 The kind of the cached page.
2093 * @param enmKind2 The kind of the requested page.
2094 */
2095static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2096{
2097 switch (enmKind1)
2098 {
2099 /*
2100 * Never reuse them. There is no remapping in non-paging mode.
2101 */
2102 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2103 case PGMPOOLKIND_32BIT_PD_PHYS:
2104 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2105 case PGMPOOLKIND_PAE_PD_PHYS:
2106 case PGMPOOLKIND_PAE_PDPT_PHYS:
2107 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2108 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2109 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2110 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2111 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2112 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2113 return false;
2114
2115 /*
2116 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2117 */
2118 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2119 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2120 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2121 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2122 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2123 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2124 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2125 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2126 case PGMPOOLKIND_32BIT_PD:
2127 case PGMPOOLKIND_PAE_PDPT:
2128 switch (enmKind2)
2129 {
2130 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2131 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2132 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2133 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2134 case PGMPOOLKIND_64BIT_PML4:
2135 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2136 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2137 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2138 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2139 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2140 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2141 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2142 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2143 return true;
2144 default:
2145 return false;
2146 }
2147
2148 /*
2149 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2150 */
2151 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2152 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2153 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2154 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2155 case PGMPOOLKIND_64BIT_PML4:
2156 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2157 switch (enmKind2)
2158 {
2159 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2160 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2161 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2162 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2163 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2164 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2165 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2166 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2167 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2168 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2169 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2170 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2171 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2172 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2173 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2174 return true;
2175 default:
2176 return false;
2177 }
2178
2179 /*
2180 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2181 */
2182 case PGMPOOLKIND_ROOT_NESTED:
2183 return false;
2184
2185 default:
2186 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2187 }
2188}
2189
2190
2191/**
2192 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2193 *
2194 * @returns VBox status code.
2195 * @retval VINF_PGM_CACHED_PAGE on success.
2196 * @retval VERR_FILE_NOT_FOUND if not found.
2197 * @param pPool The pool.
2198 * @param GCPhys The GC physical address of the page we're gonna shadow.
2199 * @param enmKind The kind of mapping.
2200 * @param enmAccess Access type for the mapping (only relevant for big pages)
2201 * @param fA20Enabled Whether the CPU has the A20 gate enabled.
2202 * @param iUser The shadow page pool index of the user table. This is
2203 * NIL_PGMPOOL_IDX for root pages.
2204 * @param iUserTable The index into the user table (shadowed). Ignored if
2205 * root page
2206 * @param ppPage Where to store the pointer to the page.
2207 */
2208static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
2209 uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2210{
2211 /*
2212 * Look up the GCPhys in the hash.
2213 */
2214 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2215 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2216 if (i != NIL_PGMPOOL_IDX)
2217 {
2218 do
2219 {
2220 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2221 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2222 if (pPage->GCPhys == GCPhys)
2223 {
2224 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2225 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess
2226 && pPage->fA20Enabled == fA20Enabled)
2227 {
2228 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2229 * doesn't flush it in case there are no more free use records.
2230 */
2231 pgmPoolCacheUsed(pPool, pPage);
2232
2233 int rc = VINF_SUCCESS;
2234 if (iUser != NIL_PGMPOOL_IDX)
2235 rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2236 if (RT_SUCCESS(rc))
2237 {
2238 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2239 *ppPage = pPage;
2240 if (pPage->cModifications)
2241 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2242 STAM_COUNTER_INC(&pPool->StatCacheHits);
2243 return VINF_PGM_CACHED_PAGE;
2244 }
2245 return rc;
2246 }
2247
2248 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2249 {
2250 /*
2251 * The kind is different. In some cases we should now flush the page
2252 * as it has been reused, but in most cases this is normal remapping
2253 * of PDs as PT or big pages using the GCPhys field in a slightly
2254 * different way than the other kinds.
2255 */
2256 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2257 {
2258 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2259 pgmPoolFlushPage(pPool, pPage);
2260 break;
2261 }
2262 }
2263 }
2264
2265 /* next */
2266 i = pPage->iNext;
2267 } while (i != NIL_PGMPOOL_IDX);
2268 }
2269
2270 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2271 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2272 return VERR_FILE_NOT_FOUND;
2273}
2274
2275
2276/**
2277 * Inserts a page into the cache.
2278 *
2279 * @param pPool The pool.
2280 * @param pPage The cached page.
2281 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2282 */
2283static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2284{
2285 /*
2286 * Insert into the GCPhys hash if the page is fit for that.
2287 */
2288 Assert(!pPage->fCached);
2289 if (fCanBeCached)
2290 {
2291 pPage->fCached = true;
2292 pgmPoolHashInsert(pPool, pPage);
2293 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2294 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2295 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2296 }
2297 else
2298 {
2299 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2300 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2301 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2302 }
2303
2304 /*
2305 * Insert at the head of the age list.
2306 */
2307 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2308 pPage->iAgeNext = pPool->iAgeHead;
2309 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2310 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2311 else
2312 pPool->iAgeTail = pPage->idx;
2313 pPool->iAgeHead = pPage->idx;
2314}
2315
2316
2317/**
2318 * Flushes a cached page.
2319 *
2320 * @param pPool The pool.
2321 * @param pPage The cached page.
2322 */
2323static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2324{
2325 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2326
2327 /*
2328 * Remove the page from the hash.
2329 */
2330 if (pPage->fCached)
2331 {
2332 pPage->fCached = false;
2333 pgmPoolHashRemove(pPool, pPage);
2334 }
2335 else
2336 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2337
2338 /*
2339 * Remove it from the age list.
2340 */
2341 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2342 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2343 else
2344 pPool->iAgeTail = pPage->iAgePrev;
2345 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2346 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2347 else
2348 pPool->iAgeHead = pPage->iAgeNext;
2349 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2350 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2351}
2352
2353
2354/**
2355 * Looks for pages sharing the monitor.
2356 *
2357 * @returns Pointer to the head page.
2358 * @returns NULL if not found.
2359 * @param pPool The Pool
2360 * @param pNewPage The page which is going to be monitored.
2361 */
2362static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2363{
2364 /*
2365 * Look up the GCPhys in the hash.
2366 */
2367 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2368 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2369 if (i == NIL_PGMPOOL_IDX)
2370 return NULL;
2371 do
2372 {
2373 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2374 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2375 && pPage != pNewPage)
2376 {
2377 switch (pPage->enmKind)
2378 {
2379 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2380 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2381 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2382 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2383 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2384 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2385 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2386 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2387 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2388 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2389 case PGMPOOLKIND_64BIT_PML4:
2390 case PGMPOOLKIND_32BIT_PD:
2391 case PGMPOOLKIND_PAE_PDPT:
2392 {
2393 /* find the head */
2394 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2395 {
2396 Assert(pPage->iMonitoredPrev != pPage->idx);
2397 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2398 }
2399 return pPage;
2400 }
2401
2402 /* ignore, no monitoring. */
2403 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2404 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2405 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2406 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2407 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2408 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2409 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2410 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2411 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2412 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2413 case PGMPOOLKIND_ROOT_NESTED:
2414 case PGMPOOLKIND_PAE_PD_PHYS:
2415 case PGMPOOLKIND_PAE_PDPT_PHYS:
2416 case PGMPOOLKIND_32BIT_PD_PHYS:
2417 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2418 break;
2419 default:
2420 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2421 }
2422 }
2423
2424 /* next */
2425 i = pPage->iNext;
2426 } while (i != NIL_PGMPOOL_IDX);
2427 return NULL;
2428}
2429
2430
2431/**
2432 * Enabled write monitoring of a guest page.
2433 *
2434 * @returns VBox status code.
2435 * @retval VINF_SUCCESS on success.
2436 * @param pPool The pool.
2437 * @param pPage The cached page.
2438 */
2439static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2440{
2441 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2442
2443 /*
2444 * Filter out the relevant kinds.
2445 */
2446 switch (pPage->enmKind)
2447 {
2448 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2449 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2450 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2451 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2452 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2453 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2454 case PGMPOOLKIND_64BIT_PML4:
2455 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2456 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2457 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2458 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2459 case PGMPOOLKIND_32BIT_PD:
2460 case PGMPOOLKIND_PAE_PDPT:
2461 break;
2462
2463 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2464 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2465 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2466 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2467 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2468 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2469 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2470 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2471 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2472 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2473 case PGMPOOLKIND_ROOT_NESTED:
2474 /* Nothing to monitor here. */
2475 return VINF_SUCCESS;
2476
2477 case PGMPOOLKIND_32BIT_PD_PHYS:
2478 case PGMPOOLKIND_PAE_PDPT_PHYS:
2479 case PGMPOOLKIND_PAE_PD_PHYS:
2480 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2481 /* Nothing to monitor here. */
2482 return VINF_SUCCESS;
2483 default:
2484 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2485 }
2486
2487 /*
2488 * Install handler.
2489 */
2490 int rc;
2491 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2492 if (pPageHead)
2493 {
2494 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2495 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2496
2497#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2498 if (pPageHead->fDirty)
2499 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirtyEntry, false /* do not remove */);
2500#endif
2501
2502 pPage->iMonitoredPrev = pPageHead->idx;
2503 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2504 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2505 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2506 pPageHead->iMonitoredNext = pPage->idx;
2507 rc = VINF_SUCCESS;
2508 }
2509 else
2510 {
2511 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2512 PVMCC pVM = pPool->CTX_SUFF(pVM);
2513 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2514 rc = PGMHandlerPhysicalRegister(pVM, GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK, pPool->hAccessHandlerType,
2515 MMHyperCCToR3(pVM, pPage), MMHyperCCToR0(pVM, pPage), MMHyperCCToRC(pVM, pPage),
2516 NIL_RTR3PTR /*pszDesc*/);
2517 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2518 * the heap size should suffice. */
2519 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2520 PVMCPU pVCpu = VMMGetCpu(pVM);
2521 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2522 }
2523 pPage->fMonitored = true;
2524 return rc;
2525}
2526
2527
2528/**
2529 * Disables write monitoring of a guest page.
2530 *
2531 * @returns VBox status code.
2532 * @retval VINF_SUCCESS on success.
2533 * @param pPool The pool.
2534 * @param pPage The cached page.
2535 */
2536static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2537{
2538 /*
2539 * Filter out the relevant kinds.
2540 */
2541 switch (pPage->enmKind)
2542 {
2543 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2544 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2545 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2546 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2547 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2548 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2549 case PGMPOOLKIND_64BIT_PML4:
2550 case PGMPOOLKIND_32BIT_PD:
2551 case PGMPOOLKIND_PAE_PDPT:
2552 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2553 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2554 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2555 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2556 break;
2557
2558 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2559 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2560 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2561 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2562 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2563 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2564 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2565 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2566 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2567 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2568 case PGMPOOLKIND_ROOT_NESTED:
2569 case PGMPOOLKIND_PAE_PD_PHYS:
2570 case PGMPOOLKIND_PAE_PDPT_PHYS:
2571 case PGMPOOLKIND_32BIT_PD_PHYS:
2572 /* Nothing to monitor here. */
2573 Assert(!pPage->fMonitored);
2574 return VINF_SUCCESS;
2575
2576 default:
2577 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2578 }
2579 Assert(pPage->fMonitored);
2580
2581 /*
2582 * Remove the page from the monitored list or uninstall it if last.
2583 */
2584 const PVMCC pVM = pPool->CTX_SUFF(pVM);
2585 int rc;
2586 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2587 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2588 {
2589 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2590 {
2591 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2592 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2593 rc = PGMHandlerPhysicalChangeUserArgs(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK,
2594 MMHyperCCToR3(pVM, pNewHead), MMHyperCCToR0(pVM, pNewHead));
2595
2596 AssertFatalRCSuccess(rc);
2597 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2598 }
2599 else
2600 {
2601 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2602 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2603 {
2604 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2605 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2606 }
2607 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2608 rc = VINF_SUCCESS;
2609 }
2610 }
2611 else
2612 {
2613 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2614 AssertFatalRC(rc);
2615 PVMCPU pVCpu = VMMGetCpu(pVM);
2616 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2617 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2618 }
2619 pPage->fMonitored = false;
2620
2621 /*
2622 * Remove it from the list of modified pages (if in it).
2623 */
2624 pgmPoolMonitorModifiedRemove(pPool, pPage);
2625
2626 return rc;
2627}
2628
2629
2630/**
2631 * Inserts the page into the list of modified pages.
2632 *
2633 * @param pPool The pool.
2634 * @param pPage The page.
2635 */
2636void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2637{
2638 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2639 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2640 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2641 && pPool->iModifiedHead != pPage->idx,
2642 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2643 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2644 pPool->iModifiedHead, pPool->cModifiedPages));
2645
2646 pPage->iModifiedNext = pPool->iModifiedHead;
2647 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2648 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2649 pPool->iModifiedHead = pPage->idx;
2650 pPool->cModifiedPages++;
2651#ifdef VBOX_WITH_STATISTICS
2652 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2653 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2654#endif
2655}
2656
2657
2658/**
2659 * Removes the page from the list of modified pages and resets the
2660 * modification counter.
2661 *
2662 * @param pPool The pool.
2663 * @param pPage The page which is believed to be in the list of modified pages.
2664 */
2665static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2666{
2667 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2668 if (pPool->iModifiedHead == pPage->idx)
2669 {
2670 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2671 pPool->iModifiedHead = pPage->iModifiedNext;
2672 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2673 {
2674 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2675 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2676 }
2677 pPool->cModifiedPages--;
2678 }
2679 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2680 {
2681 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2682 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2683 {
2684 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2685 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2686 }
2687 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2688 pPool->cModifiedPages--;
2689 }
2690 else
2691 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2692 pPage->cModifications = 0;
2693}
2694
2695
2696/**
2697 * Zaps the list of modified pages, resetting their modification counters in the process.
2698 *
2699 * @param pVM The cross context VM structure.
2700 */
2701static void pgmPoolMonitorModifiedClearAll(PVMCC pVM)
2702{
2703 pgmLock(pVM);
2704 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2705 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2706
2707 unsigned cPages = 0; NOREF(cPages);
2708
2709#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2710 pgmPoolResetDirtyPages(pVM);
2711#endif
2712
2713 uint16_t idx = pPool->iModifiedHead;
2714 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2715 while (idx != NIL_PGMPOOL_IDX)
2716 {
2717 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2718 idx = pPage->iModifiedNext;
2719 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2720 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2721 pPage->cModifications = 0;
2722 Assert(++cPages);
2723 }
2724 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2725 pPool->cModifiedPages = 0;
2726 pgmUnlock(pVM);
2727}
2728
2729
2730/**
2731 * Handle SyncCR3 pool tasks
2732 *
2733 * @returns VBox status code.
2734 * @retval VINF_SUCCESS if successfully added.
2735 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2736 * @param pVCpu The cross context virtual CPU structure.
2737 * @remark Should only be used when monitoring is available, thus placed in
2738 * the PGMPOOL_WITH_MONITORING \#ifdef.
2739 */
2740int pgmPoolSyncCR3(PVMCPUCC pVCpu)
2741{
2742 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2743 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2744
2745 /*
2746 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2747 * Occasionally we will have to clear all the shadow page tables because we wanted
2748 * to monitor a page which was mapped by too many shadowed page tables. This operation
2749 * sometimes referred to as a 'lightweight flush'.
2750 */
2751# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2752 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2753 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2754# else /* !IN_RING3 */
2755 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2756 {
2757 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2758 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2759
2760 /* Make sure all other VCPUs return to ring 3. */
2761 if (pVM->cCpus > 1)
2762 {
2763 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2764 PGM_INVL_ALL_VCPU_TLBS(pVM);
2765 }
2766 return VINF_PGM_SYNC_CR3;
2767 }
2768# endif /* !IN_RING3 */
2769 else
2770 {
2771 pgmPoolMonitorModifiedClearAll(pVM);
2772
2773 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2774 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2775 {
2776 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2777 return pgmPoolSyncCR3(pVCpu);
2778 }
2779 }
2780 return VINF_SUCCESS;
2781}
2782
2783
2784/**
2785 * Frees up at least one user entry.
2786 *
2787 * @returns VBox status code.
2788 * @retval VINF_SUCCESS if successfully added.
2789 *
2790 * @param pPool The pool.
2791 * @param iUser The user index.
2792 */
2793static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2794{
2795 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2796 /*
2797 * Just free cached pages in a braindead fashion.
2798 */
2799 /** @todo walk the age list backwards and free the first with usage. */
2800 int rc = VINF_SUCCESS;
2801 do
2802 {
2803 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2804 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2805 rc = rc2;
2806 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2807 return rc;
2808}
2809
2810
2811/**
2812 * Inserts a page into the cache.
2813 *
2814 * This will create user node for the page, insert it into the GCPhys
2815 * hash, and insert it into the age list.
2816 *
2817 * @returns VBox status code.
2818 * @retval VINF_SUCCESS if successfully added.
2819 *
2820 * @param pPool The pool.
2821 * @param pPage The cached page.
2822 * @param GCPhys The GC physical address of the page we're gonna shadow.
2823 * @param iUser The user index.
2824 * @param iUserTable The user table index.
2825 */
2826DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2827{
2828 int rc = VINF_SUCCESS;
2829 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2830
2831 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable)); RT_NOREF_PV(GCPhys);
2832
2833 if (iUser != NIL_PGMPOOL_IDX)
2834 {
2835#ifdef VBOX_STRICT
2836 /*
2837 * Check that the entry doesn't already exists.
2838 */
2839 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2840 {
2841 uint16_t i = pPage->iUserHead;
2842 do
2843 {
2844 Assert(i < pPool->cMaxUsers);
2845 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2846 i = paUsers[i].iNext;
2847 } while (i != NIL_PGMPOOL_USER_INDEX);
2848 }
2849#endif
2850
2851 /*
2852 * Find free a user node.
2853 */
2854 uint16_t i = pPool->iUserFreeHead;
2855 if (i == NIL_PGMPOOL_USER_INDEX)
2856 {
2857 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2858 if (RT_FAILURE(rc))
2859 return rc;
2860 i = pPool->iUserFreeHead;
2861 }
2862
2863 /*
2864 * Unlink the user node from the free list,
2865 * initialize and insert it into the user list.
2866 */
2867 pPool->iUserFreeHead = paUsers[i].iNext;
2868 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2869 paUsers[i].iUser = iUser;
2870 paUsers[i].iUserTable = iUserTable;
2871 pPage->iUserHead = i;
2872 }
2873 else
2874 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2875
2876
2877 /*
2878 * Insert into cache and enable monitoring of the guest page if enabled.
2879 *
2880 * Until we implement caching of all levels, including the CR3 one, we'll
2881 * have to make sure we don't try monitor & cache any recursive reuse of
2882 * a monitored CR3 page. Because all windows versions are doing this we'll
2883 * have to be able to do combined access monitoring, CR3 + PT and
2884 * PD + PT (guest PAE).
2885 *
2886 * Update:
2887 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2888 */
2889 const bool fCanBeMonitored = true;
2890 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2891 if (fCanBeMonitored)
2892 {
2893 rc = pgmPoolMonitorInsert(pPool, pPage);
2894 AssertRC(rc);
2895 }
2896 return rc;
2897}
2898
2899
2900/**
2901 * Adds a user reference to a page.
2902 *
2903 * This will move the page to the head of the
2904 *
2905 * @returns VBox status code.
2906 * @retval VINF_SUCCESS if successfully added.
2907 *
2908 * @param pPool The pool.
2909 * @param pPage The cached page.
2910 * @param iUser The user index.
2911 * @param iUserTable The user table.
2912 */
2913static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2914{
2915 Log3(("pgmPoolTrackAddUser: GCPhys=%RGp iUser=%x iUserTable=%x\n", pPage->GCPhys, iUser, iUserTable));
2916 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2917 Assert(iUser != NIL_PGMPOOL_IDX);
2918
2919# ifdef VBOX_STRICT
2920 /*
2921 * Check that the entry doesn't already exists. We only allow multiple
2922 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
2923 */
2924 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2925 {
2926 uint16_t i = pPage->iUserHead;
2927 do
2928 {
2929 Assert(i < pPool->cMaxUsers);
2930 /** @todo this assertion looks odd... Shouldn't it be && here? */
2931 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2932 i = paUsers[i].iNext;
2933 } while (i != NIL_PGMPOOL_USER_INDEX);
2934 }
2935# endif
2936
2937 /*
2938 * Allocate a user node.
2939 */
2940 uint16_t i = pPool->iUserFreeHead;
2941 if (i == NIL_PGMPOOL_USER_INDEX)
2942 {
2943 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2944 if (RT_FAILURE(rc))
2945 return rc;
2946 i = pPool->iUserFreeHead;
2947 }
2948 pPool->iUserFreeHead = paUsers[i].iNext;
2949
2950 /*
2951 * Initialize the user node and insert it.
2952 */
2953 paUsers[i].iNext = pPage->iUserHead;
2954 paUsers[i].iUser = iUser;
2955 paUsers[i].iUserTable = iUserTable;
2956 pPage->iUserHead = i;
2957
2958# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2959 if (pPage->fDirty)
2960 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirtyEntry, false /* do not remove */);
2961# endif
2962
2963 /*
2964 * Tell the cache to update its replacement stats for this page.
2965 */
2966 pgmPoolCacheUsed(pPool, pPage);
2967 return VINF_SUCCESS;
2968}
2969
2970
2971/**
2972 * Frees a user record associated with a page.
2973 *
2974 * This does not clear the entry in the user table, it simply replaces the
2975 * user record to the chain of free records.
2976 *
2977 * @param pPool The pool.
2978 * @param pPage The shadow page.
2979 * @param iUser The shadow page pool index of the user table.
2980 * @param iUserTable The index into the user table (shadowed).
2981 *
2982 * @remarks Don't call this for root pages.
2983 */
2984static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2985{
2986 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2987 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2988 Assert(iUser != NIL_PGMPOOL_IDX);
2989
2990 /*
2991 * Unlink and free the specified user entry.
2992 */
2993
2994 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2995 uint16_t i = pPage->iUserHead;
2996 if ( i != NIL_PGMPOOL_USER_INDEX
2997 && paUsers[i].iUser == iUser
2998 && paUsers[i].iUserTable == iUserTable)
2999 {
3000 pPage->iUserHead = paUsers[i].iNext;
3001
3002 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3003 paUsers[i].iNext = pPool->iUserFreeHead;
3004 pPool->iUserFreeHead = i;
3005 return;
3006 }
3007
3008 /* General: Linear search. */
3009 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
3010 while (i != NIL_PGMPOOL_USER_INDEX)
3011 {
3012 if ( paUsers[i].iUser == iUser
3013 && paUsers[i].iUserTable == iUserTable)
3014 {
3015 if (iPrev != NIL_PGMPOOL_USER_INDEX)
3016 paUsers[iPrev].iNext = paUsers[i].iNext;
3017 else
3018 pPage->iUserHead = paUsers[i].iNext;
3019
3020 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3021 paUsers[i].iNext = pPool->iUserFreeHead;
3022 pPool->iUserFreeHead = i;
3023 return;
3024 }
3025 iPrev = i;
3026 i = paUsers[i].iNext;
3027 }
3028
3029 /* Fatal: didn't find it */
3030 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
3031 iUser, iUserTable, pPage->GCPhys));
3032}
3033
3034
3035#if 0 /* unused */
3036/**
3037 * Gets the entry size of a shadow table.
3038 *
3039 * @param enmKind The kind of page.
3040 *
3041 * @returns The size of the entry in bytes. That is, 4 or 8.
3042 * @returns If the kind is not for a table, an assertion is raised and 0 is
3043 * returned.
3044 */
3045DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
3046{
3047 switch (enmKind)
3048 {
3049 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3050 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3051 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3052 case PGMPOOLKIND_32BIT_PD:
3053 case PGMPOOLKIND_32BIT_PD_PHYS:
3054 return 4;
3055
3056 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3057 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3058 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3059 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3060 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3061 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3062 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3063 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3064 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3065 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3066 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3067 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3068 case PGMPOOLKIND_64BIT_PML4:
3069 case PGMPOOLKIND_PAE_PDPT:
3070 case PGMPOOLKIND_ROOT_NESTED:
3071 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3072 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3073 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3074 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3075 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3076 case PGMPOOLKIND_PAE_PD_PHYS:
3077 case PGMPOOLKIND_PAE_PDPT_PHYS:
3078 return 8;
3079
3080 default:
3081 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3082 }
3083}
3084#endif /* unused */
3085
3086#if 0 /* unused */
3087/**
3088 * Gets the entry size of a guest table.
3089 *
3090 * @param enmKind The kind of page.
3091 *
3092 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3093 * @returns If the kind is not for a table, an assertion is raised and 0 is
3094 * returned.
3095 */
3096DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3097{
3098 switch (enmKind)
3099 {
3100 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3101 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3102 case PGMPOOLKIND_32BIT_PD:
3103 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3104 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3105 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3106 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3107 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3108 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3109 return 4;
3110
3111 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3112 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3113 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3114 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3115 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3116 case PGMPOOLKIND_64BIT_PML4:
3117 case PGMPOOLKIND_PAE_PDPT:
3118 return 8;
3119
3120 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3121 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3122 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3123 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3124 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3125 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3126 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3127 case PGMPOOLKIND_ROOT_NESTED:
3128 case PGMPOOLKIND_PAE_PD_PHYS:
3129 case PGMPOOLKIND_PAE_PDPT_PHYS:
3130 case PGMPOOLKIND_32BIT_PD_PHYS:
3131 /** @todo can we return 0? (nobody is calling this...) */
3132 AssertFailed();
3133 return 0;
3134
3135 default:
3136 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3137 }
3138}
3139#endif /* unused */
3140
3141
3142/**
3143 * Checks one shadow page table entry for a mapping of a physical page.
3144 *
3145 * @returns true / false indicating removal of all relevant PTEs
3146 *
3147 * @param pVM The cross context VM structure.
3148 * @param pPhysPage The guest page in question.
3149 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3150 * @param iShw The shadow page table.
3151 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3152 */
3153static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3154{
3155 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3156 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3157 bool fRet = false;
3158
3159 /*
3160 * Assert sanity.
3161 */
3162 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3163 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3164 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3165
3166 /*
3167 * Then, clear the actual mappings to the page in the shadow PT.
3168 */
3169 switch (pPage->enmKind)
3170 {
3171 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3172 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3173 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3174 {
3175 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3176 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3177 uint32_t u32AndMask = 0;
3178 uint32_t u32OrMask = 0;
3179
3180 if (!fFlushPTEs)
3181 {
3182 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3183 {
3184 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3185 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3186 u32OrMask = X86_PTE_RW;
3187 u32AndMask = UINT32_MAX;
3188 fRet = true;
3189 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3190 break;
3191
3192 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3193 u32OrMask = 0;
3194 u32AndMask = ~X86_PTE_RW;
3195 fRet = true;
3196 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3197 break;
3198 default:
3199 /* (shouldn't be here, will assert below) */
3200 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3201 break;
3202 }
3203 }
3204 else
3205 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3206
3207 /* Update the counter if we're removing references. */
3208 if (!u32AndMask)
3209 {
3210 Assert(pPage->cPresent);
3211 Assert(pPool->cPresent);
3212 pPage->cPresent--;
3213 pPool->cPresent--;
3214 }
3215
3216 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3217 {
3218 X86PTE Pte;
3219
3220 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3221 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3222 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3223 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3224
3225 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3226 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3227 return fRet;
3228 }
3229#ifdef LOG_ENABLED
3230 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3231 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3232 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3233 {
3234 Log(("i=%d cFound=%d\n", i, ++cFound));
3235 }
3236#endif
3237 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3238 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3239 break;
3240 }
3241
3242 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3243 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3244 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3245 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3246 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3247 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3248 {
3249 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3250 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3251 uint64_t u64OrMask = 0;
3252 uint64_t u64AndMask = 0;
3253
3254 if (!fFlushPTEs)
3255 {
3256 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3257 {
3258 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3259 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3260 u64OrMask = X86_PTE_RW;
3261 u64AndMask = UINT64_MAX;
3262 fRet = true;
3263 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3264 break;
3265
3266 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3267 u64OrMask = 0;
3268 u64AndMask = ~(uint64_t)X86_PTE_RW;
3269 fRet = true;
3270 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3271 break;
3272
3273 default:
3274 /* (shouldn't be here, will assert below) */
3275 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3276 break;
3277 }
3278 }
3279 else
3280 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3281
3282 /* Update the counter if we're removing references. */
3283 if (!u64AndMask)
3284 {
3285 Assert(pPage->cPresent);
3286 Assert(pPool->cPresent);
3287 pPage->cPresent--;
3288 pPool->cPresent--;
3289 }
3290
3291 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3292 {
3293 X86PTEPAE Pte;
3294
3295 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3296 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3297 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3298 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3299
3300 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3301 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3302 return fRet;
3303 }
3304#ifdef LOG_ENABLED
3305 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3306 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3307 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3308 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3309 Log(("i=%d cFound=%d\n", i, ++cFound));
3310#endif
3311 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3312 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3313 break;
3314 }
3315
3316#ifdef PGM_WITH_LARGE_PAGES
3317 /* Large page case only. */
3318 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3319 {
3320 Assert(pVM->pgm.s.fNestedPaging);
3321
3322 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3323 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3324
3325 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3326 {
3327 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3328 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3329 pPD->a[iPte].u = 0;
3330 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3331
3332 /* Update the counter as we're removing references. */
3333 Assert(pPage->cPresent);
3334 Assert(pPool->cPresent);
3335 pPage->cPresent--;
3336 pPool->cPresent--;
3337
3338 return fRet;
3339 }
3340# ifdef LOG_ENABLED
3341 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3342 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3343 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3344 Log(("i=%d cFound=%d\n", i, ++cFound));
3345# endif
3346 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3347 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3348 break;
3349 }
3350
3351 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3352 case PGMPOOLKIND_PAE_PD_PHYS:
3353 {
3354 Assert(pVM->pgm.s.fNestedPaging);
3355
3356 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3357 PX86PD pPD = (PX86PD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3358
3359 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3360 {
3361 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3362 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3363 pPD->a[iPte].u = 0;
3364 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3365
3366 /* Update the counter as we're removing references. */
3367 Assert(pPage->cPresent);
3368 Assert(pPool->cPresent);
3369 pPage->cPresent--;
3370 pPool->cPresent--;
3371 return fRet;
3372 }
3373# ifdef LOG_ENABLED
3374 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3375 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3376 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3377 Log(("i=%d cFound=%d\n", i, ++cFound));
3378# endif
3379 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3380 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3381 break;
3382 }
3383#endif /* PGM_WITH_LARGE_PAGES */
3384
3385 default:
3386 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3387 }
3388
3389 /* not reached. */
3390#ifndef _MSC_VER
3391 return fRet;
3392#endif
3393}
3394
3395
3396/**
3397 * Scans one shadow page table for mappings of a physical page.
3398 *
3399 * @param pVM The cross context VM structure.
3400 * @param pPhysPage The guest page in question.
3401 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3402 * @param iShw The shadow page table.
3403 */
3404static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3405{
3406 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3407
3408 /* We should only come here with when there's only one reference to this physical page. */
3409 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3410
3411 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3412 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3413 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3414 if (!fKeptPTEs)
3415 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3416 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3417}
3418
3419
3420/**
3421 * Flushes a list of shadow page tables mapping the same physical page.
3422 *
3423 * @param pVM The cross context VM structure.
3424 * @param pPhysPage The guest page in question.
3425 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3426 * @param iPhysExt The physical cross reference extent list to flush.
3427 */
3428static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3429{
3430 PGM_LOCK_ASSERT_OWNER(pVM);
3431 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3432 bool fKeepList = false;
3433
3434 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3435 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt=%u\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3436
3437 const uint16_t iPhysExtStart = iPhysExt;
3438 PPGMPOOLPHYSEXT pPhysExt;
3439 do
3440 {
3441 Assert(iPhysExt < pPool->cMaxPhysExts);
3442 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3443 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3444 {
3445 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3446 {
3447 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3448 if (!fKeptPTEs)
3449 {
3450 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3451 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3452 }
3453 else
3454 fKeepList = true;
3455 }
3456 }
3457 /* next */
3458 iPhysExt = pPhysExt->iNext;
3459 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3460
3461 if (!fKeepList)
3462 {
3463 /* insert the list into the free list and clear the ram range entry. */
3464 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3465 pPool->iPhysExtFreeHead = iPhysExtStart;
3466 /* Invalidate the tracking data. */
3467 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3468 }
3469
3470 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3471}
3472
3473
3474/**
3475 * Flushes all shadow page table mappings of the given guest page.
3476 *
3477 * This is typically called when the host page backing the guest one has been
3478 * replaced or when the page protection was changed due to a guest access
3479 * caught by the monitoring.
3480 *
3481 * @returns VBox status code.
3482 * @retval VINF_SUCCESS if all references has been successfully cleared.
3483 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3484 * pool cleaning. FF and sync flags are set.
3485 *
3486 * @param pVM The cross context VM structure.
3487 * @param GCPhysPage GC physical address of the page in question
3488 * @param pPhysPage The guest page in question.
3489 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3490 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3491 * flushed, it is NOT touched if this isn't necessary.
3492 * The caller MUST initialized this to @a false.
3493 */
3494int pgmPoolTrackUpdateGCPhys(PVMCC pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3495{
3496 PVMCPUCC pVCpu = VMMGetCpu(pVM);
3497 pgmLock(pVM);
3498 int rc = VINF_SUCCESS;
3499
3500#ifdef PGM_WITH_LARGE_PAGES
3501 /* Is this page part of a large page? */
3502 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3503 {
3504 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3505 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3506
3507 /* Fetch the large page base. */
3508 PPGMPAGE pLargePage;
3509 if (GCPhysBase != GCPhysPage)
3510 {
3511 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3512 AssertFatal(pLargePage);
3513 }
3514 else
3515 pLargePage = pPhysPage;
3516
3517 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3518
3519 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3520 {
3521 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3522 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3523 pVM->pgm.s.cLargePagesDisabled++;
3524
3525 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3526 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3527
3528 *pfFlushTLBs = true;
3529 pgmUnlock(pVM);
3530 return rc;
3531 }
3532 }
3533#else
3534 NOREF(GCPhysPage);
3535#endif /* PGM_WITH_LARGE_PAGES */
3536
3537 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3538 if (u16)
3539 {
3540 /*
3541 * The zero page is currently screwing up the tracking and we'll
3542 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3543 * is defined, zero pages won't normally be mapped. Some kind of solution
3544 * will be needed for this problem of course, but it will have to wait...
3545 */
3546 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3547 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3548 rc = VINF_PGM_GCPHYS_ALIASED;
3549 else
3550 {
3551# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 /** @todo we can drop this now. */
3552 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3553 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3554 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
3555# endif
3556
3557 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3558 {
3559 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3560 pgmPoolTrackFlushGCPhysPT(pVM,
3561 pPhysPage,
3562 fFlushPTEs,
3563 PGMPOOL_TD_GET_IDX(u16));
3564 }
3565 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3566 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3567 else
3568 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3569 *pfFlushTLBs = true;
3570
3571# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3572 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
3573# endif
3574 }
3575 }
3576
3577 if (rc == VINF_PGM_GCPHYS_ALIASED)
3578 {
3579 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3580 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3581 rc = VINF_PGM_SYNC_CR3;
3582 }
3583 pgmUnlock(pVM);
3584 return rc;
3585}
3586
3587
3588/**
3589 * Scans all shadow page tables for mappings of a physical page.
3590 *
3591 * This may be slow, but it's most likely more efficient than cleaning
3592 * out the entire page pool / cache.
3593 *
3594 * @returns VBox status code.
3595 * @retval VINF_SUCCESS if all references has been successfully cleared.
3596 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3597 * a page pool cleaning.
3598 *
3599 * @param pVM The cross context VM structure.
3600 * @param pPhysPage The guest page in question.
3601 */
3602int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3603{
3604 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3605 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3606 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3607 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3608
3609 /*
3610 * There is a limit to what makes sense.
3611 */
3612 if ( pPool->cPresent > 1024
3613 && pVM->cCpus == 1)
3614 {
3615 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3616 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3617 return VINF_PGM_GCPHYS_ALIASED;
3618 }
3619
3620 /*
3621 * Iterate all the pages until we've encountered all that in use.
3622 * This is simple but not quite optimal solution.
3623 */
3624 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P; /** @todo drop X86_PTE_P here as we always test if present separately, anyway. */
3625 const uint32_t u32 = u64; /** @todo move into the 32BIT_PT_xx case */
3626 unsigned cLeft = pPool->cUsedPages;
3627 unsigned iPage = pPool->cCurPages;
3628 while (--iPage >= PGMPOOL_IDX_FIRST)
3629 {
3630 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3631 if ( pPage->GCPhys != NIL_RTGCPHYS
3632 && pPage->cPresent)
3633 {
3634 switch (pPage->enmKind)
3635 {
3636 /*
3637 * We only care about shadow page tables.
3638 */
3639 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3640 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3641 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3642 {
3643 unsigned cPresent = pPage->cPresent;
3644 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3645 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3646 if (pPT->a[i].n.u1Present)
3647 {
3648 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3649 {
3650 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3651 pPT->a[i].u = 0;
3652
3653 /* Update the counter as we're removing references. */
3654 Assert(pPage->cPresent);
3655 Assert(pPool->cPresent);
3656 pPage->cPresent--;
3657 pPool->cPresent--;
3658 }
3659 if (!--cPresent)
3660 break;
3661 }
3662 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3663 break;
3664 }
3665
3666 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3667 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3668 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3669 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3670 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3671 {
3672 unsigned cPresent = pPage->cPresent;
3673 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3674 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3675 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3676 {
3677 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3678 {
3679 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3680 PGMSHWPTEPAE_SET(pPT->a[i], 0); /// @todo why not atomic?
3681
3682 /* Update the counter as we're removing references. */
3683 Assert(pPage->cPresent);
3684 Assert(pPool->cPresent);
3685 pPage->cPresent--;
3686 pPool->cPresent--;
3687 }
3688 if (!--cPresent)
3689 break;
3690 }
3691 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3692 break;
3693 }
3694
3695 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3696 {
3697 unsigned cPresent = pPage->cPresent;
3698 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3699 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3700 if (pPT->a[i].n.u1Present)
3701 {
3702 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3703 {
3704 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3705 pPT->a[i].u = 0;
3706
3707 /* Update the counter as we're removing references. */
3708 Assert(pPage->cPresent);
3709 Assert(pPool->cPresent);
3710 pPage->cPresent--;
3711 pPool->cPresent--;
3712 }
3713 if (!--cPresent)
3714 break;
3715 }
3716 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3717 break;
3718 }
3719 }
3720
3721 if (!--cLeft)
3722 break;
3723 }
3724 }
3725
3726 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3727 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3728
3729 /*
3730 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3731 */
3732 if (pPool->cPresent > 1024)
3733 {
3734 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3735 return VINF_PGM_GCPHYS_ALIASED;
3736 }
3737
3738 return VINF_SUCCESS;
3739}
3740
3741
3742/**
3743 * Clears the user entry in a user table.
3744 *
3745 * This is used to remove all references to a page when flushing it.
3746 */
3747static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3748{
3749 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3750 Assert(pUser->iUser < pPool->cCurPages);
3751 uint32_t iUserTable = pUser->iUserTable;
3752
3753 /*
3754 * Map the user page. Ignore references made by fictitious pages.
3755 */
3756 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3757 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3758 union
3759 {
3760 uint64_t *pau64;
3761 uint32_t *pau32;
3762 } u;
3763 if (pUserPage->idx < PGMPOOL_IDX_FIRST)
3764 {
3765 Assert(!pUserPage->pvPageR3);
3766 return;
3767 }
3768 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3769
3770
3771 /* Safety precaution in case we change the paging for other modes too in the future. */
3772 Assert(!pgmPoolIsPageLocked(pPage)); RT_NOREF_PV(pPage);
3773
3774#ifdef VBOX_STRICT
3775 /*
3776 * Some sanity checks.
3777 */
3778 switch (pUserPage->enmKind)
3779 {
3780 case PGMPOOLKIND_32BIT_PD:
3781 case PGMPOOLKIND_32BIT_PD_PHYS:
3782 Assert(iUserTable < X86_PG_ENTRIES);
3783 break;
3784 case PGMPOOLKIND_PAE_PDPT:
3785 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3786 case PGMPOOLKIND_PAE_PDPT_PHYS:
3787 Assert(iUserTable < 4);
3788 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3789 break;
3790 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3791 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3792 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3793 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3794 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3795 case PGMPOOLKIND_PAE_PD_PHYS:
3796 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3797 break;
3798 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3799 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3800 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3801 break;
3802 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3803 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3804 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3805 break;
3806 case PGMPOOLKIND_64BIT_PML4:
3807 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3808 /* GCPhys >> PAGE_SHIFT is the index here */
3809 break;
3810 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3811 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3812 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3813 break;
3814
3815 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3816 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3817 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3818 break;
3819
3820 case PGMPOOLKIND_ROOT_NESTED:
3821 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3822 break;
3823
3824 default:
3825 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3826 break;
3827 }
3828#endif /* VBOX_STRICT */
3829
3830 /*
3831 * Clear the entry in the user page.
3832 */
3833 switch (pUserPage->enmKind)
3834 {
3835 /* 32-bit entries */
3836 case PGMPOOLKIND_32BIT_PD:
3837 case PGMPOOLKIND_32BIT_PD_PHYS:
3838 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
3839 break;
3840
3841 /* 64-bit entries */
3842 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3843 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3844 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3845 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3846 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3847 case PGMPOOLKIND_PAE_PD_PHYS:
3848 case PGMPOOLKIND_PAE_PDPT_PHYS:
3849 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3850 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3851 case PGMPOOLKIND_64BIT_PML4:
3852 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3853 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3854 case PGMPOOLKIND_PAE_PDPT:
3855 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3856 case PGMPOOLKIND_ROOT_NESTED:
3857 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3858 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3859 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
3860 break;
3861
3862 default:
3863 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3864 }
3865 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
3866}
3867
3868
3869/**
3870 * Clears all users of a page.
3871 */
3872static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3873{
3874 /*
3875 * Free all the user records.
3876 */
3877 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3878
3879 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3880 uint16_t i = pPage->iUserHead;
3881 while (i != NIL_PGMPOOL_USER_INDEX)
3882 {
3883 /* Clear enter in user table. */
3884 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3885
3886 /* Free it. */
3887 const uint16_t iNext = paUsers[i].iNext;
3888 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3889 paUsers[i].iNext = pPool->iUserFreeHead;
3890 pPool->iUserFreeHead = i;
3891
3892 /* Next. */
3893 i = iNext;
3894 }
3895 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3896}
3897
3898
3899/**
3900 * Allocates a new physical cross reference extent.
3901 *
3902 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3903 * @param pVM The cross context VM structure.
3904 * @param piPhysExt Where to store the phys ext index.
3905 */
3906PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3907{
3908 PGM_LOCK_ASSERT_OWNER(pVM);
3909 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3910 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3911 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3912 {
3913 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3914 return NULL;
3915 }
3916 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3917 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3918 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3919 *piPhysExt = iPhysExt;
3920 return pPhysExt;
3921}
3922
3923
3924/**
3925 * Frees a physical cross reference extent.
3926 *
3927 * @param pVM The cross context VM structure.
3928 * @param iPhysExt The extent to free.
3929 */
3930void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3931{
3932 PGM_LOCK_ASSERT_OWNER(pVM);
3933 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3934 Assert(iPhysExt < pPool->cMaxPhysExts);
3935 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3936 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3937 {
3938 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3939 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3940 }
3941 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3942 pPool->iPhysExtFreeHead = iPhysExt;
3943}
3944
3945
3946/**
3947 * Frees a physical cross reference extent.
3948 *
3949 * @param pVM The cross context VM structure.
3950 * @param iPhysExt The extent to free.
3951 */
3952void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3953{
3954 PGM_LOCK_ASSERT_OWNER(pVM);
3955 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3956
3957 const uint16_t iPhysExtStart = iPhysExt;
3958 PPGMPOOLPHYSEXT pPhysExt;
3959 do
3960 {
3961 Assert(iPhysExt < pPool->cMaxPhysExts);
3962 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3963 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3964 {
3965 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3966 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3967 }
3968
3969 /* next */
3970 iPhysExt = pPhysExt->iNext;
3971 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3972
3973 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3974 pPool->iPhysExtFreeHead = iPhysExtStart;
3975}
3976
3977
3978/**
3979 * Insert a reference into a list of physical cross reference extents.
3980 *
3981 * @returns The new tracking data for PGMPAGE.
3982 *
3983 * @param pVM The cross context VM structure.
3984 * @param iPhysExt The physical extent index of the list head.
3985 * @param iShwPT The shadow page table index.
3986 * @param iPte Page table entry
3987 *
3988 */
3989static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
3990{
3991 PGM_LOCK_ASSERT_OWNER(pVM);
3992 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3993 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3994
3995 /*
3996 * Special common cases.
3997 */
3998 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
3999 {
4000 paPhysExts[iPhysExt].aidx[1] = iShwPT;
4001 paPhysExts[iPhysExt].apte[1] = iPte;
4002 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4003 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
4004 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4005 }
4006 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
4007 {
4008 paPhysExts[iPhysExt].aidx[2] = iShwPT;
4009 paPhysExts[iPhysExt].apte[2] = iPte;
4010 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4011 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
4012 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4013 }
4014 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
4015
4016 /*
4017 * General treatment.
4018 */
4019 const uint16_t iPhysExtStart = iPhysExt;
4020 unsigned cMax = 15;
4021 for (;;)
4022 {
4023 Assert(iPhysExt < pPool->cMaxPhysExts);
4024 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4025 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
4026 {
4027 paPhysExts[iPhysExt].aidx[i] = iShwPT;
4028 paPhysExts[iPhysExt].apte[i] = iPte;
4029 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4030 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
4031 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
4032 }
4033 if (!--cMax)
4034 {
4035 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackOverflows);
4036 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4037 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
4038 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4039 }
4040
4041 /* advance */
4042 iPhysExt = paPhysExts[iPhysExt].iNext;
4043 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4044 break;
4045 }
4046
4047 /*
4048 * Add another extent to the list.
4049 */
4050 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4051 if (!pNew)
4052 {
4053 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackNoExtentsLeft);
4054 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4055 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4056 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4057 }
4058 pNew->iNext = iPhysExtStart;
4059 pNew->aidx[0] = iShwPT;
4060 pNew->apte[0] = iPte;
4061 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4062 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4063}
4064
4065
4066/**
4067 * Add a reference to guest physical page where extents are in use.
4068 *
4069 * @returns The new tracking data for PGMPAGE.
4070 *
4071 * @param pVM The cross context VM structure.
4072 * @param pPhysPage Pointer to the aPages entry in the ram range.
4073 * @param u16 The ram range flags (top 16-bits).
4074 * @param iShwPT The shadow page table index.
4075 * @param iPte Page table entry
4076 */
4077uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4078{
4079 pgmLock(pVM);
4080 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4081 {
4082 /*
4083 * Convert to extent list.
4084 */
4085 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4086 uint16_t iPhysExt;
4087 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4088 if (pPhysExt)
4089 {
4090 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4091 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliased);
4092 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4093 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4094 pPhysExt->aidx[1] = iShwPT;
4095 pPhysExt->apte[1] = iPte;
4096 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4097 }
4098 else
4099 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4100 }
4101 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4102 {
4103 /*
4104 * Insert into the extent list.
4105 */
4106 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4107 }
4108 else
4109 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedLots);
4110 pgmUnlock(pVM);
4111 return u16;
4112}
4113
4114
4115/**
4116 * Clear references to guest physical memory.
4117 *
4118 * @param pPool The pool.
4119 * @param pPage The page.
4120 * @param pPhysPage Pointer to the aPages entry in the ram range.
4121 * @param iPte Shadow PTE index
4122 */
4123void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4124{
4125 PVMCC pVM = pPool->CTX_SUFF(pVM);
4126 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4127 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4128
4129 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4130 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4131 {
4132 pgmLock(pVM);
4133
4134 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4135 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4136 do
4137 {
4138 Assert(iPhysExt < pPool->cMaxPhysExts);
4139
4140 /*
4141 * Look for the shadow page and check if it's all freed.
4142 */
4143 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4144 {
4145 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4146 && paPhysExts[iPhysExt].apte[i] == iPte)
4147 {
4148 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4149 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4150
4151 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4152 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4153 {
4154 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4155 pgmUnlock(pVM);
4156 return;
4157 }
4158
4159 /* we can free the node. */
4160 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4161 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4162 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4163 {
4164 /* lonely node */
4165 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4166 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4167 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4168 }
4169 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4170 {
4171 /* head */
4172 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4173 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4174 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4175 }
4176 else
4177 {
4178 /* in list */
4179 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4180 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4181 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4182 }
4183 iPhysExt = iPhysExtNext;
4184 pgmUnlock(pVM);
4185 return;
4186 }
4187 }
4188
4189 /* next */
4190 iPhysExtPrev = iPhysExt;
4191 iPhysExt = paPhysExts[iPhysExt].iNext;
4192 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4193
4194 pgmUnlock(pVM);
4195 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4196 }
4197 else /* nothing to do */
4198 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4199}
4200
4201/**
4202 * Clear references to guest physical memory.
4203 *
4204 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4205 * physical address is assumed to be correct, so the linear search can be
4206 * skipped and we can assert at an earlier point.
4207 *
4208 * @param pPool The pool.
4209 * @param pPage The page.
4210 * @param HCPhys The host physical address corresponding to the guest page.
4211 * @param GCPhys The guest physical address corresponding to HCPhys.
4212 * @param iPte Shadow PTE index
4213 */
4214static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4215{
4216 /*
4217 * Lookup the page and check if it checks out before derefing it.
4218 */
4219 PVMCC pVM = pPool->CTX_SUFF(pVM);
4220 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4221 if (pPhysPage)
4222 {
4223 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4224#ifdef LOG_ENABLED
4225 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4226 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4227#endif
4228 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4229 {
4230 Assert(pPage->cPresent);
4231 Assert(pPool->cPresent);
4232 pPage->cPresent--;
4233 pPool->cPresent--;
4234 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4235 return;
4236 }
4237
4238 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp\n",
4239 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage)));
4240 }
4241 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4242}
4243
4244
4245/**
4246 * Clear references to guest physical memory.
4247 *
4248 * @param pPool The pool.
4249 * @param pPage The page.
4250 * @param HCPhys The host physical address corresponding to the guest page.
4251 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4252 * @param iPte Shadow pte index
4253 */
4254void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4255{
4256 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4257
4258 /*
4259 * Try the hint first.
4260 */
4261 RTHCPHYS HCPhysHinted;
4262 PVMCC pVM = pPool->CTX_SUFF(pVM);
4263 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4264 if (pPhysPage)
4265 {
4266 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4267 Assert(HCPhysHinted);
4268 if (HCPhysHinted == HCPhys)
4269 {
4270 Assert(pPage->cPresent);
4271 Assert(pPool->cPresent);
4272 pPage->cPresent--;
4273 pPool->cPresent--;
4274 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4275 return;
4276 }
4277 }
4278 else
4279 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4280
4281 /*
4282 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4283 */
4284 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4285 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX);
4286 while (pRam)
4287 {
4288 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4289 while (iPage-- > 0)
4290 {
4291 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4292 {
4293 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4294 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4295 Assert(pPage->cPresent);
4296 Assert(pPool->cPresent);
4297 pPage->cPresent--;
4298 pPool->cPresent--;
4299 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4300 return;
4301 }
4302 }
4303 pRam = pRam->CTX_SUFF(pNext);
4304 }
4305
4306 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4307}
4308
4309
4310/**
4311 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4312 *
4313 * @param pPool The pool.
4314 * @param pPage The page.
4315 * @param pShwPT The shadow page table (mapping of the page).
4316 * @param pGstPT The guest page table.
4317 */
4318DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4319{
4320 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4321 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4322 {
4323 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4324 if (pShwPT->a[i].n.u1Present)
4325 {
4326 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4327 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4328 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & fPgMask, i);
4329 if (!pPage->cPresent)
4330 break;
4331 }
4332 }
4333}
4334
4335
4336/**
4337 * Clear references to guest physical memory in a PAE / 32-bit page table.
4338 *
4339 * @param pPool The pool.
4340 * @param pPage The page.
4341 * @param pShwPT The shadow page table (mapping of the page).
4342 * @param pGstPT The guest page table (just a half one).
4343 */
4344DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4345{
4346 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4347 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4348 {
4349 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4350 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4351 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4352 {
4353 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4354 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4355 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4356 if (!pPage->cPresent)
4357 break;
4358 }
4359 }
4360}
4361
4362
4363/**
4364 * Clear references to guest physical memory in a PAE / PAE page table.
4365 *
4366 * @param pPool The pool.
4367 * @param pPage The page.
4368 * @param pShwPT The shadow page table (mapping of the page).
4369 * @param pGstPT The guest page table.
4370 */
4371DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4372{
4373 RTGCPHYS const fPgMask = pPage->fA20Enabled ? X86_PTE_PAE_PG_MASK : X86_PTE_PAE_PG_MASK & ~RT_BIT_64(20);
4374 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4375 {
4376 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4377 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4378 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4379 {
4380 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4381 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4382 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4383 if (!pPage->cPresent)
4384 break;
4385 }
4386 }
4387}
4388
4389
4390/**
4391 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4392 *
4393 * @param pPool The pool.
4394 * @param pPage The page.
4395 * @param pShwPT The shadow page table (mapping of the page).
4396 */
4397DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4398{
4399 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4400 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4401 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4402 {
4403 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4404 if (pShwPT->a[i].n.u1Present)
4405 {
4406 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4407 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4408 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4409 if (!pPage->cPresent)
4410 break;
4411 }
4412 }
4413}
4414
4415
4416/**
4417 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4418 *
4419 * @param pPool The pool.
4420 * @param pPage The page.
4421 * @param pShwPT The shadow page table (mapping of the page).
4422 */
4423DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4424{
4425 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4426 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4427 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4428 {
4429 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4430 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4431 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4432 {
4433 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4434 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4435 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys & GCPhysA20Mask, i);
4436 if (!pPage->cPresent)
4437 break;
4438 }
4439 }
4440}
4441
4442
4443/**
4444 * Clear references to shadowed pages in an EPT page table.
4445 *
4446 * @param pPool The pool.
4447 * @param pPage The page.
4448 * @param pShwPT The shadow page directory pointer table (mapping of the
4449 * page).
4450 */
4451DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4452{
4453 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4454 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4455 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4456 {
4457 Assert((pShwPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4458 if (pShwPT->a[i].n.u1Present)
4459 {
4460 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4461 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4462 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4463 if (!pPage->cPresent)
4464 break;
4465 }
4466 }
4467}
4468
4469
4470/**
4471 * Clear references to shadowed pages in a 32 bits page directory.
4472 *
4473 * @param pPool The pool.
4474 * @param pPage The page.
4475 * @param pShwPD The shadow page directory (mapping of the page).
4476 */
4477DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4478{
4479 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4480 {
4481 if ( pShwPD->a[i].n.u1Present
4482 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4483 )
4484 {
4485 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4486 if (pSubPage)
4487 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4488 else
4489 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4490 }
4491 }
4492}
4493
4494
4495/**
4496 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4497 *
4498 * @param pPool The pool.
4499 * @param pPage The page.
4500 * @param pShwPD The shadow page directory (mapping of the page).
4501 */
4502DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4503{
4504 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4505 {
4506 if ( pShwPD->a[i].n.u1Present
4507 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING))
4508 {
4509#ifdef PGM_WITH_LARGE_PAGES
4510 if (pShwPD->a[i].b.u1Size)
4511 {
4512 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4513 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4514 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK,
4515 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4516 i);
4517 }
4518 else
4519#endif
4520 {
4521 Assert((pShwPD->a[i].u & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000000))) == 0);
4522 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4523 if (pSubPage)
4524 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4525 else
4526 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4527 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4528 }
4529 }
4530 }
4531}
4532
4533
4534/**
4535 * Clear references to shadowed pages in a PAE page directory pointer table.
4536 *
4537 * @param pPool The pool.
4538 * @param pPage The page.
4539 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4540 */
4541DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4542{
4543 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4544 {
4545 Assert((pShwPDPT->a[i].u & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4546 if ( pShwPDPT->a[i].n.u1Present
4547 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4548 )
4549 {
4550 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4551 if (pSubPage)
4552 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4553 else
4554 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4555 }
4556 }
4557}
4558
4559
4560/**
4561 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4562 *
4563 * @param pPool The pool.
4564 * @param pPage The page.
4565 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4566 */
4567DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4568{
4569 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4570 {
4571 Assert((pShwPDPT->a[i].u & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4572 if (pShwPDPT->a[i].n.u1Present)
4573 {
4574 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4575 if (pSubPage)
4576 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4577 else
4578 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4579 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4580 }
4581 }
4582}
4583
4584
4585/**
4586 * Clear references to shadowed pages in a 64-bit level 4 page table.
4587 *
4588 * @param pPool The pool.
4589 * @param pPage The page.
4590 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4591 */
4592DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4593{
4594 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4595 {
4596 Assert((pShwPML4->a[i].u & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4597 if (pShwPML4->a[i].n.u1Present)
4598 {
4599 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4600 if (pSubPage)
4601 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4602 else
4603 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4604 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4605 }
4606 }
4607}
4608
4609
4610/**
4611 * Clear references to shadowed pages in an EPT page directory.
4612 *
4613 * @param pPool The pool.
4614 * @param pPage The page.
4615 * @param pShwPD The shadow page directory (mapping of the page).
4616 */
4617DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4618{
4619 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4620 {
4621 Assert((pShwPD->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4622 if (pShwPD->a[i].n.u1Present)
4623 {
4624#ifdef PGM_WITH_LARGE_PAGES
4625 if (pShwPD->a[i].b.u1Size)
4626 {
4627 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4628 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4629 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK,
4630 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4631 i);
4632 }
4633 else
4634#endif
4635 {
4636 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4637 if (pSubPage)
4638 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4639 else
4640 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4641 }
4642 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4643 }
4644 }
4645}
4646
4647
4648/**
4649 * Clear references to shadowed pages in an EPT page directory pointer table.
4650 *
4651 * @param pPool The pool.
4652 * @param pPage The page.
4653 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4654 */
4655DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4656{
4657 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4658 {
4659 Assert((pShwPDPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4660 if (pShwPDPT->a[i].n.u1Present)
4661 {
4662 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4663 if (pSubPage)
4664 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4665 else
4666 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4667 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4668 }
4669 }
4670}
4671
4672
4673/**
4674 * Clears all references made by this page.
4675 *
4676 * This includes other shadow pages and GC physical addresses.
4677 *
4678 * @param pPool The pool.
4679 * @param pPage The page.
4680 */
4681static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4682{
4683 /*
4684 * Map the shadow page and take action according to the page kind.
4685 */
4686 PVMCC pVM = pPool->CTX_SUFF(pVM);
4687 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4688 switch (pPage->enmKind)
4689 {
4690 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4691 {
4692 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4693 void *pvGst;
4694 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4695 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4696 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4697 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4698 break;
4699 }
4700
4701 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4702 {
4703 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4704 void *pvGst;
4705 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4706 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
4707 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4708 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4709 break;
4710 }
4711
4712 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4713 {
4714 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4715 void *pvGst;
4716 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4717 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
4718 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4719 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4720 break;
4721 }
4722
4723 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4724 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4725 {
4726 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4727 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4728 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4729 break;
4730 }
4731
4732 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4733 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4734 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4735 {
4736 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4737 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
4738 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4739 break;
4740 }
4741
4742 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4743 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4744 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4745 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4746 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4747 case PGMPOOLKIND_PAE_PD_PHYS:
4748 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4749 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4750 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4751 break;
4752
4753 case PGMPOOLKIND_32BIT_PD_PHYS:
4754 case PGMPOOLKIND_32BIT_PD:
4755 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4756 break;
4757
4758 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4759 case PGMPOOLKIND_PAE_PDPT:
4760 case PGMPOOLKIND_PAE_PDPT_PHYS:
4761 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4762 break;
4763
4764 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4765 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4766 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4767 break;
4768
4769 case PGMPOOLKIND_64BIT_PML4:
4770 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4771 break;
4772
4773 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4774 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4775 break;
4776
4777 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4778 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4779 break;
4780
4781 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4782 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4783 break;
4784
4785 default:
4786 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4787 }
4788
4789 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4790 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4791 ASMMemZeroPage(pvShw);
4792 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4793 pPage->fZeroed = true;
4794 Assert(!pPage->cPresent);
4795 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
4796}
4797
4798
4799/**
4800 * Flushes a pool page.
4801 *
4802 * This moves the page to the free list after removing all user references to it.
4803 *
4804 * @returns VBox status code.
4805 * @retval VINF_SUCCESS on success.
4806 * @param pPool The pool.
4807 * @param pPage The shadow page.
4808 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4809 */
4810int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4811{
4812 PVMCC pVM = pPool->CTX_SUFF(pVM);
4813 bool fFlushRequired = false;
4814
4815 int rc = VINF_SUCCESS;
4816 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4817 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4818 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4819
4820 /*
4821 * Reject any attempts at flushing any of the special root pages (shall
4822 * not happen).
4823 */
4824 AssertMsgReturn(pPage->idx >= PGMPOOL_IDX_FIRST,
4825 ("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n",
4826 pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx),
4827 VINF_SUCCESS);
4828
4829 pgmLock(pVM);
4830
4831 /*
4832 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4833 */
4834 if (pgmPoolIsPageLocked(pPage))
4835 {
4836 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4837 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4838 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4839 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4840 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4841 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4842 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4843 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4844 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
4845 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
4846 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4847 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4848 pgmUnlock(pVM);
4849 return VINF_SUCCESS;
4850 }
4851
4852#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4853 /* Start a subset so we won't run out of mapping space. */
4854 PVMCPU pVCpu = VMMGetCpu(pVM);
4855 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
4856#endif
4857
4858 /*
4859 * Mark the page as being in need of an ASMMemZeroPage().
4860 */
4861 pPage->fZeroed = false;
4862
4863#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4864 if (pPage->fDirty)
4865 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirtyEntry, false /* do not remove */);
4866#endif
4867
4868 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4869 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4870 fFlushRequired = true;
4871
4872 /*
4873 * Clear the page.
4874 */
4875 pgmPoolTrackClearPageUsers(pPool, pPage);
4876 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4877 pgmPoolTrackDeref(pPool, pPage);
4878 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4879
4880 /*
4881 * Flush it from the cache.
4882 */
4883 pgmPoolCacheFlushPage(pPool, pPage);
4884
4885#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4886 /* Heavy stuff done. */
4887 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
4888#endif
4889
4890 /*
4891 * Deregistering the monitoring.
4892 */
4893 if (pPage->fMonitored)
4894 rc = pgmPoolMonitorFlush(pPool, pPage);
4895
4896 /*
4897 * Free the page.
4898 */
4899 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4900 pPage->iNext = pPool->iFreeHead;
4901 pPool->iFreeHead = pPage->idx;
4902 pPage->enmKind = PGMPOOLKIND_FREE;
4903 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4904 pPage->GCPhys = NIL_RTGCPHYS;
4905 pPage->fReusedFlushPending = false;
4906
4907 pPool->cUsedPages--;
4908
4909 /* Flush the TLBs of all VCPUs if required. */
4910 if ( fFlushRequired
4911 && fFlush)
4912 {
4913 PGM_INVL_ALL_VCPU_TLBS(pVM);
4914 }
4915
4916 pgmUnlock(pVM);
4917 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4918 return rc;
4919}
4920
4921
4922/**
4923 * Frees a usage of a pool page.
4924 *
4925 * The caller is responsible to updating the user table so that it no longer
4926 * references the shadow page.
4927 *
4928 * @param pPool The pool.
4929 * @param pPage The shadow page.
4930 * @param iUser The shadow page pool index of the user table.
4931 * NIL_PGMPOOL_IDX for root pages.
4932 * @param iUserTable The index into the user table (shadowed). Ignored if
4933 * root page.
4934 */
4935void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4936{
4937 PVMCC pVM = pPool->CTX_SUFF(pVM);
4938
4939 STAM_PROFILE_START(&pPool->StatFree, a);
4940 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
4941 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4942 AssertReturnVoid(pPage->idx >= PGMPOOL_IDX_FIRST); /* paranoia (#6349) */
4943
4944 pgmLock(pVM);
4945 if (iUser != NIL_PGMPOOL_IDX)
4946 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4947 if (!pPage->fCached)
4948 pgmPoolFlushPage(pPool, pPage);
4949 pgmUnlock(pVM);
4950 STAM_PROFILE_STOP(&pPool->StatFree, a);
4951}
4952
4953
4954/**
4955 * Makes one or more free page free.
4956 *
4957 * @returns VBox status code.
4958 * @retval VINF_SUCCESS on success.
4959 *
4960 * @param pPool The pool.
4961 * @param enmKind Page table kind
4962 * @param iUser The user of the page.
4963 */
4964static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4965{
4966 PVMCC pVM = pPool->CTX_SUFF(pVM);
4967 LogFlow(("pgmPoolMakeMoreFreePages: enmKind=%d iUser=%d\n", enmKind, iUser));
4968 NOREF(enmKind);
4969
4970 /*
4971 * If the pool isn't full grown yet, expand it.
4972 */
4973 if (pPool->cCurPages < pPool->cMaxPages)
4974 {
4975 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4976#ifdef IN_RING3
4977 int rc = PGMR3PoolGrow(pVM);
4978#else
4979 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
4980#endif
4981 if (RT_FAILURE(rc))
4982 return rc;
4983 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4984 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4985 return VINF_SUCCESS;
4986 }
4987
4988 /*
4989 * Free one cached page.
4990 */
4991 return pgmPoolCacheFreeOne(pPool, iUser);
4992}
4993
4994
4995/**
4996 * Allocates a page from the pool.
4997 *
4998 * This page may actually be a cached page and not in need of any processing
4999 * on the callers part.
5000 *
5001 * @returns VBox status code.
5002 * @retval VINF_SUCCESS if a NEW page was allocated.
5003 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
5004 *
5005 * @param pVM The cross context VM structure.
5006 * @param GCPhys The GC physical address of the page we're gonna shadow.
5007 * For 4MB and 2MB PD entries, it's the first address the
5008 * shadow PT is covering.
5009 * @param enmKind The kind of mapping.
5010 * @param enmAccess Access type for the mapping (only relevant for big pages)
5011 * @param fA20Enabled Whether the A20 gate is enabled or not.
5012 * @param iUser The shadow page pool index of the user table. Root
5013 * pages should pass NIL_PGMPOOL_IDX.
5014 * @param iUserTable The index into the user table (shadowed). Ignored for
5015 * root pages (iUser == NIL_PGMPOOL_IDX).
5016 * @param fLockPage Lock the page
5017 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
5018 */
5019int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
5020 uint16_t iUser, uint32_t iUserTable, bool fLockPage, PPPGMPOOLPAGE ppPage)
5021{
5022 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5023 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
5024 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
5025 *ppPage = NULL;
5026 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
5027 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
5028 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
5029
5030 pgmLock(pVM);
5031
5032 if (pPool->fCacheEnabled)
5033 {
5034 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, fA20Enabled, iUser, iUserTable, ppPage);
5035 if (RT_SUCCESS(rc2))
5036 {
5037 if (fLockPage)
5038 pgmPoolLockPage(pPool, *ppPage);
5039 pgmUnlock(pVM);
5040 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5041 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
5042 return rc2;
5043 }
5044 }
5045
5046 /*
5047 * Allocate a new one.
5048 */
5049 int rc = VINF_SUCCESS;
5050 uint16_t iNew = pPool->iFreeHead;
5051 if (iNew == NIL_PGMPOOL_IDX)
5052 {
5053 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
5054 if (RT_FAILURE(rc))
5055 {
5056 pgmUnlock(pVM);
5057 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
5058 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5059 return rc;
5060 }
5061 iNew = pPool->iFreeHead;
5062 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_PGM_POOL_IPE);
5063 }
5064
5065 /* unlink the free head */
5066 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5067 pPool->iFreeHead = pPage->iNext;
5068 pPage->iNext = NIL_PGMPOOL_IDX;
5069
5070 /*
5071 * Initialize it.
5072 */
5073 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5074 pPage->enmKind = enmKind;
5075 pPage->enmAccess = enmAccess;
5076 pPage->GCPhys = GCPhys;
5077 pPage->fA20Enabled = fA20Enabled;
5078 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5079 pPage->fMonitored = false;
5080 pPage->fCached = false;
5081 pPage->fDirty = false;
5082 pPage->fReusedFlushPending = false;
5083 pPage->cModifications = 0;
5084 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5085 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5086 pPage->cPresent = 0;
5087 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5088 pPage->idxDirtyEntry = 0;
5089 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5090 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5091 pPage->cLastAccessHandler = 0;
5092 pPage->cLocked = 0;
5093# ifdef VBOX_STRICT
5094 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5095# endif
5096
5097 /*
5098 * Insert into the tracking and cache. If this fails, free the page.
5099 */
5100 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5101 if (RT_FAILURE(rc3))
5102 {
5103 pPool->cUsedPages--;
5104 pPage->enmKind = PGMPOOLKIND_FREE;
5105 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5106 pPage->GCPhys = NIL_RTGCPHYS;
5107 pPage->iNext = pPool->iFreeHead;
5108 pPool->iFreeHead = pPage->idx;
5109 pgmUnlock(pVM);
5110 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5111 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
5112 return rc3;
5113 }
5114
5115 /*
5116 * Commit the allocation, clear the page and return.
5117 */
5118#ifdef VBOX_WITH_STATISTICS
5119 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5120 pPool->cUsedPagesHigh = pPool->cUsedPages;
5121#endif
5122
5123 if (!pPage->fZeroed)
5124 {
5125 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5126 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5127 ASMMemZeroPage(pv);
5128 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5129 }
5130
5131 *ppPage = pPage;
5132 if (fLockPage)
5133 pgmPoolLockPage(pPool, pPage);
5134 pgmUnlock(pVM);
5135 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5136 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5137 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5138 return rc;
5139}
5140
5141
5142/**
5143 * Frees a usage of a pool page.
5144 *
5145 * @param pVM The cross context VM structure.
5146 * @param HCPhys The HC physical address of the shadow page.
5147 * @param iUser The shadow page pool index of the user table.
5148 * NIL_PGMPOOL_IDX if root page.
5149 * @param iUserTable The index into the user table (shadowed). Ignored if
5150 * root page.
5151 */
5152void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5153{
5154 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5155 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5156 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5157}
5158
5159
5160/**
5161 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5162 *
5163 * @returns Pointer to the shadow page structure.
5164 * @param pPool The pool.
5165 * @param HCPhys The HC physical address of the shadow page.
5166 */
5167PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5168{
5169 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5170
5171 /*
5172 * Look up the page.
5173 */
5174 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5175
5176 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5177 return pPage;
5178}
5179
5180
5181/**
5182 * Internal worker for finding a page for debugging purposes, no assertions.
5183 *
5184 * @returns Pointer to the shadow page structure. NULL on if not found.
5185 * @param pPool The pool.
5186 * @param HCPhys The HC physical address of the shadow page.
5187 */
5188PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5189{
5190 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5191 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5192}
5193
5194#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5195
5196/**
5197 * Flush the specified page if present
5198 *
5199 * @param pVM The cross context VM structure.
5200 * @param GCPhys Guest physical address of the page to flush
5201 */
5202void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5203{
5204 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5205
5206 VM_ASSERT_EMT(pVM);
5207
5208 /*
5209 * Look up the GCPhys in the hash.
5210 */
5211 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5212 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5213 if (i == NIL_PGMPOOL_IDX)
5214 return;
5215
5216 do
5217 {
5218 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5219 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5220 {
5221 switch (pPage->enmKind)
5222 {
5223 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5224 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5225 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5226 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5227 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5228 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5229 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5230 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5231 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5232 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5233 case PGMPOOLKIND_64BIT_PML4:
5234 case PGMPOOLKIND_32BIT_PD:
5235 case PGMPOOLKIND_PAE_PDPT:
5236 {
5237 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5238# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5239 if (pPage->fDirty)
5240 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5241 else
5242# endif
5243 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5244 Assert(!pgmPoolIsPageLocked(pPage));
5245 pgmPoolMonitorChainFlush(pPool, pPage);
5246 return;
5247 }
5248
5249 /* ignore, no monitoring. */
5250 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5251 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5252 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5253 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5254 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5255 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5256 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5257 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5258 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5259 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5260 case PGMPOOLKIND_ROOT_NESTED:
5261 case PGMPOOLKIND_PAE_PD_PHYS:
5262 case PGMPOOLKIND_PAE_PDPT_PHYS:
5263 case PGMPOOLKIND_32BIT_PD_PHYS:
5264 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5265 break;
5266
5267 default:
5268 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5269 }
5270 }
5271
5272 /* next */
5273 i = pPage->iNext;
5274 } while (i != NIL_PGMPOOL_IDX);
5275 return;
5276}
5277
5278
5279/**
5280 * Reset CPU on hot plugging.
5281 *
5282 * @param pVM The cross context VM structure.
5283 * @param pVCpu The cross context virtual CPU structure.
5284 */
5285void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5286{
5287 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5288
5289 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5290 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5291 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5292}
5293
5294
5295/**
5296 * Flushes the entire cache.
5297 *
5298 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5299 * this and execute this CR3 flush.
5300 *
5301 * @param pVM The cross context VM structure.
5302 */
5303void pgmR3PoolReset(PVM pVM)
5304{
5305 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5306
5307 PGM_LOCK_ASSERT_OWNER(pVM);
5308 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5309 LogFlow(("pgmR3PoolReset:\n"));
5310
5311 /*
5312 * If there are no pages in the pool, there is nothing to do.
5313 */
5314 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5315 {
5316 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5317 return;
5318 }
5319
5320 /*
5321 * Exit the shadow mode since we're going to clear everything,
5322 * including the root page.
5323 */
5324 VMCC_FOR_EACH_VMCPU(pVM)
5325 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5326 VMCC_FOR_EACH_VMCPU_END(pVM);
5327
5328
5329 /*
5330 * Nuke the free list and reinsert all pages into it.
5331 */
5332 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5333 {
5334 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5335
5336 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
5337 if (pPage->fMonitored)
5338 pgmPoolMonitorFlush(pPool, pPage);
5339 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5340 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5341 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5342 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5343 pPage->GCPhys = NIL_RTGCPHYS;
5344 pPage->enmKind = PGMPOOLKIND_FREE;
5345 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5346 Assert(pPage->idx == i);
5347 pPage->iNext = i + 1;
5348 pPage->fA20Enabled = true;
5349 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5350 pPage->fSeenNonGlobal = false;
5351 pPage->fMonitored = false;
5352 pPage->fDirty = false;
5353 pPage->fCached = false;
5354 pPage->fReusedFlushPending = false;
5355 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5356 pPage->cPresent = 0;
5357 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5358 pPage->cModifications = 0;
5359 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5360 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5361 pPage->idxDirtyEntry = 0;
5362 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5363 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5364 pPage->cLastAccessHandler = 0;
5365 pPage->cLocked = 0;
5366# ifdef VBOX_STRICT
5367 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5368# endif
5369 }
5370 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5371 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5372 pPool->cUsedPages = 0;
5373
5374 /*
5375 * Zap and reinitialize the user records.
5376 */
5377 pPool->cPresent = 0;
5378 pPool->iUserFreeHead = 0;
5379 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5380 const unsigned cMaxUsers = pPool->cMaxUsers;
5381 for (unsigned i = 0; i < cMaxUsers; i++)
5382 {
5383 paUsers[i].iNext = i + 1;
5384 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5385 paUsers[i].iUserTable = 0xfffffffe;
5386 }
5387 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5388
5389 /*
5390 * Clear all the GCPhys links and rebuild the phys ext free list.
5391 */
5392 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
5393 pRam;
5394 pRam = pRam->CTX_SUFF(pNext))
5395 {
5396 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5397 while (iPage-- > 0)
5398 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5399 }
5400
5401 pPool->iPhysExtFreeHead = 0;
5402 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5403 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5404 for (unsigned i = 0; i < cMaxPhysExts; i++)
5405 {
5406 paPhysExts[i].iNext = i + 1;
5407 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5408 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5409 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5410 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5411 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5412 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5413 }
5414 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5415
5416 /*
5417 * Just zap the modified list.
5418 */
5419 pPool->cModifiedPages = 0;
5420 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5421
5422 /*
5423 * Clear the GCPhys hash and the age list.
5424 */
5425 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5426 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5427 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5428 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5429
5430# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5431 /* Clear all dirty pages. */
5432 pPool->idxFreeDirtyPage = 0;
5433 pPool->cDirtyPages = 0;
5434 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aidxDirtyPages); i++)
5435 pPool->aidxDirtyPages[i] = NIL_PGMPOOL_IDX;
5436# endif
5437
5438 /*
5439 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5440 */
5441 VMCC_FOR_EACH_VMCPU(pVM)
5442 {
5443 /*
5444 * Re-enter the shadowing mode and assert Sync CR3 FF.
5445 */
5446 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5447 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5448 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5449 }
5450 VMCC_FOR_EACH_VMCPU_END(pVM);
5451
5452 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5453}
5454
5455#endif /* IN_RING3 */
5456
5457#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5458/**
5459 * Stringifies a PGMPOOLKIND value.
5460 */
5461static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5462{
5463 switch ((PGMPOOLKIND)enmKind)
5464 {
5465 case PGMPOOLKIND_INVALID:
5466 return "PGMPOOLKIND_INVALID";
5467 case PGMPOOLKIND_FREE:
5468 return "PGMPOOLKIND_FREE";
5469 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5470 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5471 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5472 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5473 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5474 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5475 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5476 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5477 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5478 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5479 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5480 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5481 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5482 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5483 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5484 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5485 case PGMPOOLKIND_32BIT_PD:
5486 return "PGMPOOLKIND_32BIT_PD";
5487 case PGMPOOLKIND_32BIT_PD_PHYS:
5488 return "PGMPOOLKIND_32BIT_PD_PHYS";
5489 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5490 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5491 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5492 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5493 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5494 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5495 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5496 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5497 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5498 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5499 case PGMPOOLKIND_PAE_PD_PHYS:
5500 return "PGMPOOLKIND_PAE_PD_PHYS";
5501 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5502 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5503 case PGMPOOLKIND_PAE_PDPT:
5504 return "PGMPOOLKIND_PAE_PDPT";
5505 case PGMPOOLKIND_PAE_PDPT_PHYS:
5506 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5507 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5508 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5509 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5510 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5511 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5512 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5513 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5514 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5515 case PGMPOOLKIND_64BIT_PML4:
5516 return "PGMPOOLKIND_64BIT_PML4";
5517 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5518 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5519 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5520 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5521 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5522 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5523 case PGMPOOLKIND_ROOT_NESTED:
5524 return "PGMPOOLKIND_ROOT_NESTED";
5525 }
5526 return "Unknown kind!";
5527}
5528#endif /* LOG_ENABLED || VBOX_STRICT */
5529
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette