VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 82896

Last change on this file since 82896 was 82896, checked in by vboxsync, 5 years ago

PGMPool: PGMR0PoolGrow should return failure when allocating the first batch of pages fails. It and the caller PGMR3PoolGrow should report the incident to the release log. bugref:9627

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 211.5 KB
Line 
1/* $Id: PGMAllPool.cpp 82896 2020-01-28 21:43:45Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2019 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#include <VBox/vmm/pgm.h>
24#include <VBox/vmm/mm.h>
25#include <VBox/vmm/em.h>
26#include <VBox/vmm/cpum.h>
27#include "PGMInternal.h"
28#include <VBox/vmm/vmcc.h>
29#include "PGMInline.h"
30#include <VBox/disopcode.h>
31#include <VBox/vmm/hm_vmx.h>
32
33#include <VBox/log.h>
34#include <VBox/err.h>
35#include <iprt/asm.h>
36#include <iprt/asm-amd64-x86.h>
37#include <iprt/string.h>
38
39
40/*********************************************************************************************************************************
41* Internal Functions *
42*********************************************************************************************************************************/
43RT_C_DECLS_BEGIN
44#if 0 /* unused */
45DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
46DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
47#endif /* unused */
48static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
49static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
50static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
51static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
52#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
53static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
54#endif
55#if 0 /*defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)*/
56static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
57#endif
58
59int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage);
60PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
61void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
62void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
63
64RT_C_DECLS_END
65
66
67#if 0 /* unused */
68/**
69 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
70 *
71 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
72 * @param enmKind The page kind.
73 */
74DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
75{
76 switch (enmKind)
77 {
78 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
79 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
81 return true;
82 default:
83 return false;
84 }
85}
86#endif /* unused */
87
88
89/**
90 * Flushes a chain of pages sharing the same access monitor.
91 *
92 * @param pPool The pool.
93 * @param pPage A page in the chain.
94 */
95void pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
96{
97 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
98
99 /*
100 * Find the list head.
101 */
102 uint16_t idx = pPage->idx;
103 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
104 {
105 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
106 {
107 idx = pPage->iMonitoredPrev;
108 Assert(idx != pPage->idx);
109 pPage = &pPool->aPages[idx];
110 }
111 }
112
113 /*
114 * Iterate the list flushing each shadow page.
115 */
116 for (;;)
117 {
118 idx = pPage->iMonitoredNext;
119 Assert(idx != pPage->idx);
120 if (pPage->idx >= PGMPOOL_IDX_FIRST)
121 {
122 int rc2 = pgmPoolFlushPage(pPool, pPage);
123 AssertRC(rc2);
124 }
125 /* next */
126 if (idx == NIL_PGMPOOL_IDX)
127 break;
128 pPage = &pPool->aPages[idx];
129 }
130}
131
132
133/**
134 * Wrapper for getting the current context pointer to the entry being modified.
135 *
136 * @returns VBox status code suitable for scheduling.
137 * @param pVM The cross context VM structure.
138 * @param pvDst Destination address
139 * @param pvSrc Pointer to the mapping of @a GCPhysSrc or NULL depending
140 * on the context (e.g. \#PF in R0 & RC).
141 * @param GCPhysSrc The source guest physical address.
142 * @param cb Size of data to read
143 */
144DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVMCC pVM, void *pvDst, void const *pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
145{
146#if defined(IN_RING3)
147 NOREF(pVM); NOREF(GCPhysSrc);
148 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
149 return VINF_SUCCESS;
150#else
151 /** @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
152 NOREF(pvSrc);
153 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
154#endif
155}
156
157
158/**
159 * Process shadow entries before they are changed by the guest.
160 *
161 * For PT entries we will clear them. For PD entries, we'll simply check
162 * for mapping conflicts and set the SyncCR3 FF if found.
163 *
164 * @param pVCpu The cross context virtual CPU structure.
165 * @param pPool The pool.
166 * @param pPage The head page.
167 * @param GCPhysFault The guest physical fault address.
168 * @param pvAddress Pointer to the mapping of @a GCPhysFault or NULL
169 * depending on the context (e.g. \#PF in R0 & RC).
170 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
171 */
172static void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault,
173 void const *pvAddress, unsigned cbWrite)
174{
175 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
176 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
177 PVMCC pVM = pPool->CTX_SUFF(pVM);
178 NOREF(pVCpu);
179
180 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n",
181 (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))(uintptr_t)pvAddress, GCPhysFault, cbWrite));
182
183 for (;;)
184 {
185 union
186 {
187 void *pv;
188 PX86PT pPT;
189 PPGMSHWPTPAE pPTPae;
190 PX86PD pPD;
191 PX86PDPAE pPDPae;
192 PX86PDPT pPDPT;
193 PX86PML4 pPML4;
194 } uShw;
195
196 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s write=%#x\n",
197 pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
198
199 uShw.pv = NULL;
200 switch (pPage->enmKind)
201 {
202 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
203 {
204 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
205 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
206 const unsigned iShw = off / sizeof(X86PTE);
207 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
208 if (uShw.pPT->a[iShw].n.u1Present)
209 {
210 X86PTE GstPte;
211
212 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
213 AssertRC(rc);
214 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
215 pgmPoolTracDerefGCPhysHint(pPool, pPage,
216 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
217 GstPte.u & X86_PTE_PG_MASK,
218 iShw);
219 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
220 }
221 break;
222 }
223
224 /* page/2 sized */
225 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
226 {
227 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
228 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
229 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
230 {
231 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
232 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
233 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
234 {
235 X86PTE GstPte;
236 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
237 AssertRC(rc);
238
239 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
240 pgmPoolTracDerefGCPhysHint(pPool, pPage,
241 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
242 GstPte.u & X86_PTE_PG_MASK,
243 iShw);
244 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
245 }
246 }
247 break;
248 }
249
250 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
251 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
252 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
253 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
254 {
255 unsigned iGst = off / sizeof(X86PDE);
256 unsigned iShwPdpt = iGst / 256;
257 unsigned iShw = (iGst % 256) * 2;
258 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
259
260 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
261 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
262 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
263 {
264 for (unsigned i = 0; i < 2; i++)
265 {
266 if (uShw.pPDPae->a[iShw+i].n.u1Present)
267 {
268 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
269 pgmPoolFree(pVM,
270 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
271 pPage->idx,
272 iShw + i);
273 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw+i].u, 0);
274 }
275
276 /* paranoia / a bit assumptive. */
277 if ( (off & 3)
278 && (off & 3) + cbWrite > 4)
279 {
280 const unsigned iShw2 = iShw + 2 + i;
281 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
282 {
283 if (uShw.pPDPae->a[iShw2].n.u1Present)
284 {
285 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
286 pgmPoolFree(pVM,
287 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
288 pPage->idx,
289 iShw2);
290 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
291 }
292 }
293 }
294 }
295 }
296 break;
297 }
298
299 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
300 {
301 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
302 const unsigned iShw = off / sizeof(X86PTEPAE);
303 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
304 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
305 {
306 X86PTEPAE GstPte;
307 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
308 AssertRC(rc);
309
310 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
311 pgmPoolTracDerefGCPhysHint(pPool, pPage,
312 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
313 GstPte.u & X86_PTE_PAE_PG_MASK,
314 iShw);
315 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
316 }
317
318 /* paranoia / a bit assumptive. */
319 if ( (off & 7)
320 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
321 {
322 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
323 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
324
325 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
326 {
327 X86PTEPAE GstPte;
328 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
329 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
330 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
331 AssertRC(rc);
332 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
333 pgmPoolTracDerefGCPhysHint(pPool, pPage,
334 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
335 GstPte.u & X86_PTE_PAE_PG_MASK,
336 iShw2);
337 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
338 }
339 }
340 break;
341 }
342
343 case PGMPOOLKIND_32BIT_PD:
344 {
345 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
346 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
347
348 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
349 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
350 if (uShw.pPD->a[iShw].n.u1Present)
351 {
352 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
353 pgmPoolFree(pVM,
354 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
355 pPage->idx,
356 iShw);
357 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
358 }
359 /* paranoia / a bit assumptive. */
360 if ( (off & 3)
361 && (off & 3) + cbWrite > sizeof(X86PTE))
362 {
363 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
364 if ( iShw2 != iShw
365 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
366 {
367 if (uShw.pPD->a[iShw2].n.u1Present)
368 {
369 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
370 pgmPoolFree(pVM,
371 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
372 pPage->idx,
373 iShw2);
374 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
375 }
376 }
377 }
378#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). - not working any longer... */
379 if ( uShw.pPD->a[iShw].n.u1Present
380 && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
381 {
382 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
383 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
384 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
385 }
386#endif
387 break;
388 }
389
390 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
391 {
392 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
393 const unsigned iShw = off / sizeof(X86PDEPAE);
394 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
395
396 /*
397 * Causes trouble when the guest uses a PDE to refer to the whole page table level
398 * structure. (Invalidate here; faults later on when it tries to change the page
399 * table entries -> recheck; probably only applies to the RC case.)
400 */
401 if (uShw.pPDPae->a[iShw].n.u1Present)
402 {
403 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
404 pgmPoolFree(pVM,
405 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
406 pPage->idx,
407 iShw);
408 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
409 }
410
411 /* paranoia / a bit assumptive. */
412 if ( (off & 7)
413 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
414 {
415 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
416 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
417
418 if (uShw.pPDPae->a[iShw2].n.u1Present)
419 {
420 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
421 pgmPoolFree(pVM,
422 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
423 pPage->idx,
424 iShw2);
425 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
426 }
427 }
428 break;
429 }
430
431 case PGMPOOLKIND_PAE_PDPT:
432 {
433 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
434 /*
435 * Hopefully this doesn't happen very often:
436 * - touching unused parts of the page
437 * - messing with the bits of pd pointers without changing the physical address
438 */
439 /* PDPT roots are not page aligned; 32 byte only! */
440 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
441
442 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
443 const unsigned iShw = offPdpt / sizeof(X86PDPE);
444 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
445 {
446 if (uShw.pPDPT->a[iShw].n.u1Present)
447 {
448 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
449 pgmPoolFree(pVM,
450 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
451 pPage->idx,
452 iShw);
453 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
454 }
455
456 /* paranoia / a bit assumptive. */
457 if ( (offPdpt & 7)
458 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
459 {
460 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
461 if ( iShw2 != iShw
462 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
463 {
464 if (uShw.pPDPT->a[iShw2].n.u1Present)
465 {
466 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
467 pgmPoolFree(pVM,
468 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
469 pPage->idx,
470 iShw2);
471 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
472 }
473 }
474 }
475 }
476 break;
477 }
478
479 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
480 {
481 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
482 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
483 const unsigned iShw = off / sizeof(X86PDEPAE);
484 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
485 if (uShw.pPDPae->a[iShw].n.u1Present)
486 {
487 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
488 pgmPoolFree(pVM,
489 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
490 pPage->idx,
491 iShw);
492 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
493 }
494 /* paranoia / a bit assumptive. */
495 if ( (off & 7)
496 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
497 {
498 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
499 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
500
501 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
502 if (uShw.pPDPae->a[iShw2].n.u1Present)
503 {
504 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
505 pgmPoolFree(pVM,
506 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
507 pPage->idx,
508 iShw2);
509 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
510 }
511 }
512 break;
513 }
514
515 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
516 {
517 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
518 /*
519 * Hopefully this doesn't happen very often:
520 * - messing with the bits of pd pointers without changing the physical address
521 */
522 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
523 const unsigned iShw = off / sizeof(X86PDPE);
524 if (uShw.pPDPT->a[iShw].n.u1Present)
525 {
526 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
527 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
528 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
529 }
530 /* paranoia / a bit assumptive. */
531 if ( (off & 7)
532 && (off & 7) + cbWrite > sizeof(X86PDPE))
533 {
534 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
535 if (uShw.pPDPT->a[iShw2].n.u1Present)
536 {
537 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
538 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
539 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
540 }
541 }
542 break;
543 }
544
545 case PGMPOOLKIND_64BIT_PML4:
546 {
547 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
548 /*
549 * Hopefully this doesn't happen very often:
550 * - messing with the bits of pd pointers without changing the physical address
551 */
552 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
553 const unsigned iShw = off / sizeof(X86PDPE);
554 if (uShw.pPML4->a[iShw].n.u1Present)
555 {
556 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
557 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
558 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
559 }
560 /* paranoia / a bit assumptive. */
561 if ( (off & 7)
562 && (off & 7) + cbWrite > sizeof(X86PDPE))
563 {
564 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
565 if (uShw.pPML4->a[iShw2].n.u1Present)
566 {
567 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
568 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
569 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
570 }
571 }
572 break;
573 }
574
575 default:
576 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
577 }
578 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
579
580 /* next */
581 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
582 return;
583 pPage = &pPool->aPages[pPage->iMonitoredNext];
584 }
585}
586
587#ifndef IN_RING3
588
589/**
590 * Checks if a access could be a fork operation in progress.
591 *
592 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
593 *
594 * @returns true if it's likely that we're forking, otherwise false.
595 * @param pPool The pool.
596 * @param pDis The disassembled instruction.
597 * @param offFault The access offset.
598 */
599DECLINLINE(bool) pgmRZPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
600{
601 /*
602 * i386 linux is using btr to clear X86_PTE_RW.
603 * The functions involved are (2.6.16 source inspection):
604 * clear_bit
605 * ptep_set_wrprotect
606 * copy_one_pte
607 * copy_pte_range
608 * copy_pmd_range
609 * copy_pud_range
610 * copy_page_range
611 * dup_mmap
612 * dup_mm
613 * copy_mm
614 * copy_process
615 * do_fork
616 */
617 if ( pDis->pCurInstr->uOpcode == OP_BTR
618 && !(offFault & 4)
619 /** @todo Validate that the bit index is X86_PTE_RW. */
620 )
621 {
622 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,Fork)); RT_NOREF_PV(pPool);
623 return true;
624 }
625 return false;
626}
627
628
629/**
630 * Determine whether the page is likely to have been reused.
631 *
632 * @returns true if we consider the page as being reused for a different purpose.
633 * @returns false if we consider it to still be a paging page.
634 * @param pVM The cross context VM structure.
635 * @param pVCpu The cross context virtual CPU structure.
636 * @param pRegFrame Trap register frame.
637 * @param pDis The disassembly info for the faulting instruction.
638 * @param pvFault The fault address.
639 * @param pPage The pool page being accessed.
640 *
641 * @remark The REP prefix check is left to the caller because of STOSD/W.
642 */
643DECLINLINE(bool) pgmRZPoolMonitorIsReused(PVMCC pVM, PVMCPUCC pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault,
644 PPGMPOOLPAGE pPage)
645{
646 /* Locked (CR3, PDPTR*4) should not be reusable. Considering them as
647 such may cause loops booting tst-ubuntu-15_10-64-efi, ++. */
648 if (pPage->cLocked)
649 {
650 Log2(("pgmRZPoolMonitorIsReused: %RGv (%p) can't have been resued, because it's locked!\n", pvFault, pPage));
651 return false;
652 }
653
654 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
655 if ( HMHasPendingIrq(pVM)
656 && pRegFrame->rsp - pvFault < 32)
657 {
658 /* Fault caused by stack writes while trying to inject an interrupt event. */
659 Log(("pgmRZPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
660 return true;
661 }
662
663 LogFlow(("Reused instr %RGv %d at %RGv param1.fUse=%llx param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->uOpcode, pvFault, pDis->Param1.fUse, pDis->Param1.Base.idxGenReg));
664
665 /* Non-supervisor mode write means it's used for something else. */
666 if (CPUMGetGuestCPL(pVCpu) == 3)
667 return true;
668
669 switch (pDis->pCurInstr->uOpcode)
670 {
671 /* call implies the actual push of the return address faulted */
672 case OP_CALL:
673 Log4(("pgmRZPoolMonitorIsReused: CALL\n"));
674 return true;
675 case OP_PUSH:
676 Log4(("pgmRZPoolMonitorIsReused: PUSH\n"));
677 return true;
678 case OP_PUSHF:
679 Log4(("pgmRZPoolMonitorIsReused: PUSHF\n"));
680 return true;
681 case OP_PUSHA:
682 Log4(("pgmRZPoolMonitorIsReused: PUSHA\n"));
683 return true;
684 case OP_FXSAVE:
685 Log4(("pgmRZPoolMonitorIsReused: FXSAVE\n"));
686 return true;
687 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
688 Log4(("pgmRZPoolMonitorIsReused: MOVNTI\n"));
689 return true;
690 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
691 Log4(("pgmRZPoolMonitorIsReused: MOVNTDQ\n"));
692 return true;
693 case OP_MOVSWD:
694 case OP_STOSWD:
695 if ( pDis->fPrefix == (DISPREFIX_REP|DISPREFIX_REX)
696 && pRegFrame->rcx >= 0x40
697 )
698 {
699 Assert(pDis->uCpuMode == DISCPUMODE_64BIT);
700
701 Log(("pgmRZPoolMonitorIsReused: OP_STOSQ\n"));
702 return true;
703 }
704 break;
705
706 default:
707 /*
708 * Anything having ESP on the left side means stack writes.
709 */
710 if ( ( (pDis->Param1.fUse & DISUSE_REG_GEN32)
711 || (pDis->Param1.fUse & DISUSE_REG_GEN64))
712 && (pDis->Param1.Base.idxGenReg == DISGREG_ESP))
713 {
714 Log4(("pgmRZPoolMonitorIsReused: ESP\n"));
715 return true;
716 }
717 break;
718 }
719
720 /*
721 * Page table updates are very very unlikely to be crossing page boundraries,
722 * and we don't want to deal with that in pgmPoolMonitorChainChanging and such.
723 */
724 uint32_t const cbWrite = DISGetParamSize(pDis, &pDis->Param1);
725 if ( (((uintptr_t)pvFault + cbWrite) >> X86_PAGE_SHIFT) != ((uintptr_t)pvFault >> X86_PAGE_SHIFT) )
726 {
727 Log4(("pgmRZPoolMonitorIsReused: cross page write\n"));
728 return true;
729 }
730
731 /*
732 * Nobody does an unaligned 8 byte write to a page table, right.
733 */
734 if (cbWrite >= 8 && ((uintptr_t)pvFault & 7) != 0)
735 {
736 Log4(("pgmRZPoolMonitorIsReused: Unaligned 8+ byte write\n"));
737 return true;
738 }
739
740 return false;
741}
742
743
744/**
745 * Flushes the page being accessed.
746 *
747 * @returns VBox status code suitable for scheduling.
748 * @param pVM The cross context VM structure.
749 * @param pVCpu The cross context virtual CPU structure.
750 * @param pPool The pool.
751 * @param pPage The pool page (head).
752 * @param pDis The disassembly of the write instruction.
753 * @param pRegFrame The trap register frame.
754 * @param GCPhysFault The fault address as guest physical address.
755 * @param pvFault The fault address.
756 * @todo VBOXSTRICTRC
757 */
758static int pgmRZPoolAccessPfHandlerFlush(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
759 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
760{
761 NOREF(pVM); NOREF(GCPhysFault);
762
763 /*
764 * First, do the flushing.
765 */
766 pgmPoolMonitorChainFlush(pPool, pPage);
767
768 /*
769 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
770 * Must do this in raw mode (!); XP boot will fail otherwise.
771 */
772 int rc = VINF_SUCCESS;
773 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
774 if (rc2 == VINF_SUCCESS)
775 { /* do nothing */ }
776 else if (rc2 == VINF_EM_RESCHEDULE)
777 {
778 rc = VBOXSTRICTRC_VAL(rc2);
779# ifndef IN_RING3
780 VMCPU_FF_SET(pVCpu, VMCPU_FF_TO_R3);
781# endif
782 }
783 else if (rc2 == VERR_EM_INTERPRETER)
784 {
785 rc = VINF_EM_RAW_EMULATE_INSTR;
786 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
787 }
788 else if (RT_FAILURE_NP(rc2))
789 rc = VBOXSTRICTRC_VAL(rc2);
790 else
791 AssertMsgFailed(("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
792
793 LogFlow(("pgmRZPoolAccessPfHandlerFlush: returns %Rrc (flushed)\n", rc));
794 return rc;
795}
796
797
798/**
799 * Handles the STOSD write accesses.
800 *
801 * @returns VBox status code suitable for scheduling.
802 * @param pVM The cross context VM structure.
803 * @param pPool The pool.
804 * @param pPage The pool page (head).
805 * @param pDis The disassembly of the write instruction.
806 * @param pRegFrame The trap register frame.
807 * @param GCPhysFault The fault address as guest physical address.
808 * @param pvFault The fault address.
809 */
810DECLINLINE(int) pgmRZPoolAccessPfHandlerSTOSD(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
811 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
812{
813 unsigned uIncrement = pDis->Param1.cb;
814 NOREF(pVM);
815
816 Assert(pDis->uCpuMode == DISCPUMODE_32BIT || pDis->uCpuMode == DISCPUMODE_64BIT);
817 Assert(pRegFrame->rcx <= 0x20);
818
819# ifdef VBOX_STRICT
820 if (pDis->uOpMode == DISCPUMODE_32BIT)
821 Assert(uIncrement == 4);
822 else
823 Assert(uIncrement == 8);
824# endif
825
826 Log3(("pgmRZPoolAccessPfHandlerSTOSD\n"));
827
828 /*
829 * Increment the modification counter and insert it into the list
830 * of modified pages the first time.
831 */
832 if (!pPage->cModifications++)
833 pgmPoolMonitorModifiedInsert(pPool, pPage);
834
835 /*
836 * Execute REP STOSD.
837 *
838 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
839 * write situation, meaning that it's safe to write here.
840 */
841 PVMCPUCC pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
842 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
843 while (pRegFrame->rcx)
844 {
845# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
846 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
847 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
848 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
849# else
850 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
851# endif
852 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
853 pu32 += uIncrement;
854 GCPhysFault += uIncrement;
855 pRegFrame->rdi += uIncrement;
856 pRegFrame->rcx--;
857 }
858 pRegFrame->rip += pDis->cbInstr;
859
860 LogFlow(("pgmRZPoolAccessPfHandlerSTOSD: returns\n"));
861 return VINF_SUCCESS;
862}
863
864
865/**
866 * Handles the simple write accesses.
867 *
868 * @returns VBox status code suitable for scheduling.
869 * @param pVM The cross context VM structure.
870 * @param pVCpu The cross context virtual CPU structure.
871 * @param pPool The pool.
872 * @param pPage The pool page (head).
873 * @param pDis The disassembly of the write instruction.
874 * @param pRegFrame The trap register frame.
875 * @param GCPhysFault The fault address as guest physical address.
876 * @param pvFault The fault address.
877 * @param pfReused Reused state (in/out)
878 */
879DECLINLINE(int) pgmRZPoolAccessPfHandlerSimple(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
880 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
881{
882 Log3(("pgmRZPoolAccessPfHandlerSimple\n"));
883 NOREF(pVM);
884 NOREF(pfReused); /* initialized by caller */
885
886 /*
887 * Increment the modification counter and insert it into the list
888 * of modified pages the first time.
889 */
890 if (!pPage->cModifications++)
891 pgmPoolMonitorModifiedInsert(pPool, pPage);
892
893 /*
894 * Clear all the pages. ASSUMES that pvFault is readable.
895 */
896# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
897 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
898# endif
899
900 uint32_t cbWrite = DISGetParamSize(pDis, &pDis->Param1);
901 if (cbWrite <= 8)
902 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, cbWrite);
903 else if (cbWrite <= 16)
904 {
905 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, 8);
906 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + 8, NULL, cbWrite - 8);
907 }
908 else
909 {
910 Assert(cbWrite <= 32);
911 for (uint32_t off = 0; off < cbWrite; off += 8)
912 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + off, NULL, RT_MIN(8, cbWrite - off));
913 }
914
915# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
916 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
917# endif
918
919 /*
920 * Interpret the instruction.
921 */
922 VBOXSTRICTRC rc = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
923 if (RT_SUCCESS(rc))
924 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
925 else if (rc == VERR_EM_INTERPRETER)
926 {
927 LogFlow(("pgmRZPoolAccessPfHandlerSimple: Interpretation failed for %04x:%RGv - opcode=%d\n",
928 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode));
929 rc = VINF_EM_RAW_EMULATE_INSTR;
930 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
931 }
932
933# if 0 /* experimental code */
934 if (rc == VINF_SUCCESS)
935 {
936 switch (pPage->enmKind)
937 {
938 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
939 {
940 X86PTEPAE GstPte;
941 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
942 AssertRC(rc);
943
944 /* Check the new value written by the guest. If present and with a bogus physical address, then
945 * it's fairly safe to assume the guest is reusing the PT.
946 */
947 if (GstPte.n.u1Present)
948 {
949 RTHCPHYS HCPhys = -1;
950 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
951 if (rc != VINF_SUCCESS)
952 {
953 *pfReused = true;
954 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
955 }
956 }
957 break;
958 }
959 }
960 }
961# endif
962
963 LogFlow(("pgmRZPoolAccessPfHandlerSimple: returns %Rrc\n", VBOXSTRICTRC_VAL(rc)));
964 return VBOXSTRICTRC_VAL(rc);
965}
966
967
968/**
969 * @callback_method_impl{FNPGMRZPHYSPFHANDLER,
970 * \#PF access handler callback for page table pages.}
971 *
972 * @remarks The @a pvUser argument points to the PGMPOOLPAGE.
973 */
974DECLEXPORT(VBOXSTRICTRC) pgmRZPoolAccessPfHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame,
975 RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
976{
977 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorRZ, a);
978 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
979 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
980 unsigned cMaxModifications;
981 bool fForcedFlush = false;
982 NOREF(uErrorCode);
983
984 LogFlow(("pgmRZPoolAccessPfHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
985
986 pgmLock(pVM);
987 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
988 {
989 /* Pool page changed while we were waiting for the lock; ignore. */
990 Log(("CPU%d: pgmRZPoolAccessPfHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
991 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
992 pgmUnlock(pVM);
993 return VINF_SUCCESS;
994 }
995# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
996 if (pPage->fDirty)
997 {
998 Assert(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH));
999 pgmUnlock(pVM);
1000 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1001 }
1002# endif
1003
1004# if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1005 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1006 {
1007 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1008 void *pvGst;
1009 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1010 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1011 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1012 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1013 }
1014# endif
1015
1016 /*
1017 * Disassemble the faulting instruction.
1018 */
1019 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1020 int rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
1021 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1022 {
1023 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1024 pgmUnlock(pVM);
1025 return rc;
1026 }
1027
1028 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1029
1030 /*
1031 * We should ALWAYS have the list head as user parameter. This
1032 * is because we use that page to record the changes.
1033 */
1034 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1035
1036# ifdef IN_RING0
1037 /* Maximum nr of modifications depends on the page type. */
1038 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1039 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1040 cMaxModifications = 4;
1041 else
1042 cMaxModifications = 24;
1043# else
1044 cMaxModifications = 48;
1045# endif
1046
1047 /*
1048 * Incremental page table updates should weigh more than random ones.
1049 * (Only applies when started from offset 0)
1050 */
1051 pVCpu->pgm.s.cPoolAccessHandler++;
1052 if ( pPage->GCPtrLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1053 && pPage->GCPtrLastAccessHandlerRip < pRegFrame->rip + 0x40
1054 && pvFault == (pPage->GCPtrLastAccessHandlerFault + pDis->Param1.cb)
1055 && pVCpu->pgm.s.cPoolAccessHandler == pPage->cLastAccessHandler + 1)
1056 {
1057 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1058 Assert(pPage->cModifications < 32000);
1059 pPage->cModifications = pPage->cModifications * 2;
1060 pPage->GCPtrLastAccessHandlerFault = pvFault;
1061 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1062 if (pPage->cModifications >= cMaxModifications)
1063 {
1064 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushReinit);
1065 fForcedFlush = true;
1066 }
1067 }
1068
1069 if (pPage->cModifications >= cMaxModifications)
1070 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1071
1072 /*
1073 * Check if it's worth dealing with.
1074 */
1075 bool fReused = false;
1076 bool fNotReusedNotForking = false;
1077 if ( ( pPage->cModifications < cMaxModifications /** @todo \#define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1078 || pgmPoolIsPageLocked(pPage)
1079 )
1080 && !(fReused = pgmRZPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault, pPage))
1081 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1082 {
1083 /*
1084 * Simple instructions, no REP prefix.
1085 */
1086 if (!(pDis->fPrefix & (DISPREFIX_REP | DISPREFIX_REPNE)))
1087 {
1088 rc = pgmRZPoolAccessPfHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1089 if (fReused)
1090 goto flushPage;
1091
1092 /* A mov instruction to change the first page table entry will be remembered so we can detect
1093 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1094 */
1095 if ( rc == VINF_SUCCESS
1096 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1097 && pDis->pCurInstr->uOpcode == OP_MOV
1098 && (pvFault & PAGE_OFFSET_MASK) == 0)
1099 {
1100 pPage->GCPtrLastAccessHandlerFault = pvFault;
1101 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1102 pPage->GCPtrLastAccessHandlerRip = pRegFrame->rip;
1103 /* Make sure we don't kick out a page too quickly. */
1104 if (pPage->cModifications > 8)
1105 pPage->cModifications = 2;
1106 }
1107 else if (pPage->GCPtrLastAccessHandlerFault == pvFault)
1108 {
1109 /* ignore the 2nd write to this page table entry. */
1110 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1111 }
1112 else
1113 {
1114 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
1115 pPage->GCPtrLastAccessHandlerRip = 0;
1116 }
1117
1118 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
1119 pgmUnlock(pVM);
1120 return rc;
1121 }
1122
1123 /*
1124 * Windows is frequently doing small memset() operations (netio test 4k+).
1125 * We have to deal with these or we'll kill the cache and performance.
1126 */
1127 if ( pDis->pCurInstr->uOpcode == OP_STOSWD
1128 && !pRegFrame->eflags.Bits.u1DF
1129 && pDis->uOpMode == pDis->uCpuMode
1130 && pDis->uAddrMode == pDis->uCpuMode)
1131 {
1132 bool fValidStosd = false;
1133
1134 if ( pDis->uCpuMode == DISCPUMODE_32BIT
1135 && pDis->fPrefix == DISPREFIX_REP
1136 && pRegFrame->ecx <= 0x20
1137 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1138 && !((uintptr_t)pvFault & 3)
1139 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1140 )
1141 {
1142 fValidStosd = true;
1143 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1144 }
1145 else
1146 if ( pDis->uCpuMode == DISCPUMODE_64BIT
1147 && pDis->fPrefix == (DISPREFIX_REP | DISPREFIX_REX)
1148 && pRegFrame->rcx <= 0x20
1149 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1150 && !((uintptr_t)pvFault & 7)
1151 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1152 )
1153 {
1154 fValidStosd = true;
1155 }
1156
1157 if (fValidStosd)
1158 {
1159 rc = pgmRZPoolAccessPfHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1160 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZRepStosd, a);
1161 pgmUnlock(pVM);
1162 return rc;
1163 }
1164 }
1165
1166 /* REP prefix, don't bother. */
1167 STAM_COUNTER_INC(&pPool->StatMonitorPfRZRepPrefix);
1168 Log4(("pgmRZPoolAccessPfHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1169 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode, pDis->fPrefix));
1170 fNotReusedNotForking = true;
1171 }
1172
1173# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1174 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1175 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1176 */
1177 if ( pPage->cModifications >= cMaxModifications
1178 && !fForcedFlush
1179 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1180 && ( fNotReusedNotForking
1181 || ( !pgmRZPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault, pPage)
1182 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1183 )
1184 )
1185 {
1186 Assert(!pgmPoolIsPageLocked(pPage));
1187 Assert(pPage->fDirty == false);
1188
1189 /* Flush any monitored duplicates as we will disable write protection. */
1190 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1191 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1192 {
1193 PPGMPOOLPAGE pPageHead = pPage;
1194
1195 /* Find the monitor head. */
1196 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1197 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1198
1199 while (pPageHead)
1200 {
1201 unsigned idxNext = pPageHead->iMonitoredNext;
1202
1203 if (pPageHead != pPage)
1204 {
1205 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1206 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1207 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1208 AssertRC(rc2);
1209 }
1210
1211 if (idxNext == NIL_PGMPOOL_IDX)
1212 break;
1213
1214 pPageHead = &pPool->aPages[idxNext];
1215 }
1216 }
1217
1218 /* The flushing above might fail for locked pages, so double check. */
1219 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1220 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1221 {
1222 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1223
1224 /* Temporarily allow write access to the page table again. */
1225 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK, pPage->GCPhys & PAGE_BASE_GC_MASK);
1226 if (rc == VINF_SUCCESS)
1227 {
1228 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1229 AssertMsg(rc == VINF_SUCCESS
1230 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1231 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1232 || rc == VERR_PAGE_NOT_PRESENT,
1233 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1234# ifdef VBOX_STRICT
1235 pPage->GCPtrDirtyFault = pvFault;
1236# endif
1237
1238 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, a);
1239 pgmUnlock(pVM);
1240 return rc;
1241 }
1242 }
1243 }
1244# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT && IN_RING0 */
1245
1246 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushModOverflow);
1247flushPage:
1248 /*
1249 * Not worth it, so flush it.
1250 *
1251 * If we considered it to be reused, don't go back to ring-3
1252 * to emulate failed instructions since we usually cannot
1253 * interpret then. This may be a bit risky, in which case
1254 * the reuse detection must be fixed.
1255 */
1256 rc = pgmRZPoolAccessPfHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1257 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1258 && fReused)
1259 {
1260 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1261 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1262 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1263 }
1264 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZFlushPage, a);
1265 pgmUnlock(pVM);
1266 return rc;
1267}
1268
1269#endif /* !IN_RING3 */
1270
1271/**
1272 * @callback_method_impl{FNPGMPHYSHANDLER,
1273 * Access handler for shadowed page table pages.}
1274 *
1275 * @remarks Only uses the VINF_PGM_HANDLER_DO_DEFAULT status.
1276 */
1277PGM_ALL_CB2_DECL(VBOXSTRICTRC)
1278pgmPoolAccessHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf,
1279 PGMACCESSTYPE enmAccessType, PGMACCESSORIGIN enmOrigin, void *pvUser)
1280{
1281 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1282 STAM_PROFILE_START(&pPool->CTX_SUFF_Z(StatMonitor), a);
1283 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1284 LogFlow(("PGM_ALL_CB_DECL: GCPhys=%RGp %p:{.Core=%RHp, .idx=%d, .GCPhys=%RGp, .enmType=%d}\n",
1285 GCPhys, pPage, pPage->Core.Key, pPage->idx, pPage->GCPhys, pPage->enmKind));
1286
1287 NOREF(pvPhys); NOREF(pvBuf); NOREF(enmAccessType);
1288
1289 pgmLock(pVM);
1290
1291#ifdef VBOX_WITH_STATISTICS
1292 /*
1293 * Collect stats on the access.
1294 */
1295 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Sizes)) == 19);
1296 if (cbBuf <= 16 && cbBuf > 0)
1297 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[cbBuf - 1]);
1298 else if (cbBuf >= 17 && cbBuf < 32)
1299 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[16]);
1300 else if (cbBuf >= 32 && cbBuf < 64)
1301 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[17]);
1302 else if (cbBuf >= 64)
1303 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[18]);
1304
1305 uint8_t cbAlign;
1306 switch (pPage->enmKind)
1307 {
1308 default:
1309 cbAlign = 7;
1310 break;
1311 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1312 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1313 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1314 case PGMPOOLKIND_32BIT_PD:
1315 case PGMPOOLKIND_32BIT_PD_PHYS:
1316 cbAlign = 3;
1317 break;
1318 }
1319 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Misaligned)) == 7);
1320 if ((uint8_t)GCPhys & cbAlign)
1321 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Misaligned)[((uint8_t)GCPhys & cbAlign) - 1]);
1322#endif
1323
1324 /*
1325 * Make sure the pool page wasn't modified by a different CPU.
1326 */
1327 if (PHYS_PAGE_ADDRESS(GCPhys) == PHYS_PAGE_ADDRESS(pPage->GCPhys))
1328 {
1329 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1330
1331 /* The max modification count before flushing depends on the context and page type. */
1332#ifdef IN_RING3
1333 uint16_t const cMaxModifications = 96; /* it's cheaper here, right? */
1334#else
1335 uint16_t cMaxModifications;
1336 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1337 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1338 cMaxModifications = 4;
1339 else
1340 cMaxModifications = 24;
1341#endif
1342
1343 /*
1344 * We don't have to be very sophisticated about this since there are relativly few calls here.
1345 * However, we must try our best to detect any non-cpu accesses (disk / networking).
1346 */
1347 if ( ( pPage->cModifications < cMaxModifications
1348 || pgmPoolIsPageLocked(pPage) )
1349 && enmOrigin != PGMACCESSORIGIN_DEVICE
1350 && cbBuf <= 16)
1351 {
1352 /* Clear the shadow entry. */
1353 if (!pPage->cModifications++)
1354 pgmPoolMonitorModifiedInsert(pPool, pPage);
1355
1356 if (cbBuf <= 8)
1357 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, (uint32_t)cbBuf);
1358 else
1359 {
1360 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, 8);
1361 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys + 8, (uint8_t *)pvBuf + 8, (uint32_t)cbBuf - 8);
1362 }
1363 }
1364 else
1365 pgmPoolMonitorChainFlush(pPool, pPage);
1366
1367 STAM_PROFILE_STOP_EX(&pPool->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1368 }
1369 else
1370 Log(("CPU%d: PGM_ALL_CB_DECL pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhys), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1371 pgmUnlock(pVM);
1372 return VINF_PGM_HANDLER_DO_DEFAULT;
1373}
1374
1375
1376#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1377
1378# if defined(VBOX_STRICT) && !defined(IN_RING3)
1379
1380/**
1381 * Check references to guest physical memory in a PAE / PAE page table.
1382 *
1383 * @param pPool The pool.
1384 * @param pPage The page.
1385 * @param pShwPT The shadow page table (mapping of the page).
1386 * @param pGstPT The guest page table.
1387 */
1388static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1389{
1390 unsigned cErrors = 0;
1391 int LastRc = -1; /* initialized to shut up gcc */
1392 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1393 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1394 PVMCC pVM = pPool->CTX_SUFF(pVM);
1395
1396# ifdef VBOX_STRICT
1397 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1398 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1399# endif
1400 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1401 {
1402 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1403 {
1404 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1405 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1406 if ( rc != VINF_SUCCESS
1407 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1408 {
1409 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1410 LastPTE = i;
1411 LastRc = rc;
1412 LastHCPhys = HCPhys;
1413 cErrors++;
1414
1415 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1416 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1417 AssertRC(rc);
1418
1419 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1420 {
1421 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1422
1423 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1424 {
1425 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1426
1427 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1428 {
1429 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1430 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1431 {
1432 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1433 }
1434 }
1435
1436 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1437 }
1438 }
1439 }
1440 }
1441 }
1442 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1443}
1444
1445
1446/**
1447 * Check references to guest physical memory in a PAE / 32-bit page table.
1448 *
1449 * @param pPool The pool.
1450 * @param pPage The page.
1451 * @param pShwPT The shadow page table (mapping of the page).
1452 * @param pGstPT The guest page table.
1453 */
1454static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1455{
1456 unsigned cErrors = 0;
1457 int LastRc = -1; /* initialized to shut up gcc */
1458 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1459 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1460 PVMCC pVM = pPool->CTX_SUFF(pVM);
1461
1462# ifdef VBOX_STRICT
1463 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1464 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1465# endif
1466 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1467 {
1468 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1469 {
1470 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1471 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1472 if ( rc != VINF_SUCCESS
1473 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1474 {
1475 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1476 LastPTE = i;
1477 LastRc = rc;
1478 LastHCPhys = HCPhys;
1479 cErrors++;
1480
1481 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1482 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1483 AssertRC(rc);
1484
1485 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1486 {
1487 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1488
1489 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1490 {
1491 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1492
1493 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1494 {
1495 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1496 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1497 {
1498 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1499 }
1500 }
1501
1502 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1503 }
1504 }
1505 }
1506 }
1507 }
1508 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1509}
1510
1511# endif /* VBOX_STRICT && !IN_RING3 */
1512
1513/**
1514 * Clear references to guest physical memory in a PAE / PAE page table.
1515 *
1516 * @returns nr of changed PTEs
1517 * @param pPool The pool.
1518 * @param pPage The page.
1519 * @param pShwPT The shadow page table (mapping of the page).
1520 * @param pGstPT The guest page table.
1521 * @param pOldGstPT The old cached guest page table.
1522 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1523 * @param pfFlush Flush reused page table (out)
1524 */
1525DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1526 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1527{
1528 unsigned cChanged = 0;
1529
1530# ifdef VBOX_STRICT
1531 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1532 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1533# endif
1534 *pfFlush = false;
1535
1536 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1537 {
1538 /* Check the new value written by the guest. If present and with a bogus physical address, then
1539 * it's fairly safe to assume the guest is reusing the PT.
1540 */
1541 if ( fAllowRemoval
1542 && pGstPT->a[i].n.u1Present)
1543 {
1544 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1545 {
1546 *pfFlush = true;
1547 return ++cChanged;
1548 }
1549 }
1550 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1551 {
1552 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1553 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1554 {
1555# ifdef VBOX_STRICT
1556 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1557 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1558 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1559# endif
1560 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1561 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1562 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1563 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1564
1565 if ( uHostAttr == uGuestAttr
1566 && fHostRW <= fGuestRW)
1567 continue;
1568 }
1569 cChanged++;
1570 /* Something was changed, so flush it. */
1571 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1572 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1573 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1574 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1575 }
1576 }
1577 return cChanged;
1578}
1579
1580
1581/**
1582 * Clear references to guest physical memory in a PAE / PAE page table.
1583 *
1584 * @returns nr of changed PTEs
1585 * @param pPool The pool.
1586 * @param pPage The page.
1587 * @param pShwPT The shadow page table (mapping of the page).
1588 * @param pGstPT The guest page table.
1589 * @param pOldGstPT The old cached guest page table.
1590 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1591 * @param pfFlush Flush reused page table (out)
1592 */
1593DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1594 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1595{
1596 unsigned cChanged = 0;
1597
1598# ifdef VBOX_STRICT
1599 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1600 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1601# endif
1602 *pfFlush = false;
1603
1604 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1605 {
1606 /* Check the new value written by the guest. If present and with a bogus physical address, then
1607 * it's fairly safe to assume the guest is reusing the PT.
1608 */
1609 if ( fAllowRemoval
1610 && pGstPT->a[i].n.u1Present)
1611 {
1612 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK))
1613 {
1614 *pfFlush = true;
1615 return ++cChanged;
1616 }
1617 }
1618 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1619 {
1620 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1621 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1622 {
1623# ifdef VBOX_STRICT
1624 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1625 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1626 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1627# endif
1628 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1629 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1630 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1631 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1632
1633 if ( uHostAttr == uGuestAttr
1634 && fHostRW <= fGuestRW)
1635 continue;
1636 }
1637 cChanged++;
1638 /* Something was changed, so flush it. */
1639 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1640 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1641 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1642 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1643 }
1644 }
1645 return cChanged;
1646}
1647
1648
1649/**
1650 * Flush a dirty page
1651 *
1652 * @param pVM The cross context VM structure.
1653 * @param pPool The pool.
1654 * @param idxSlot Dirty array slot index
1655 * @param fAllowRemoval Allow a reused page table to be removed
1656 */
1657static void pgmPoolFlushDirtyPage(PVMCC pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1658{
1659 AssertCompile(RT_ELEMENTS(pPool->aidxDirtyPages) == RT_ELEMENTS(pPool->aDirtyPages));
1660
1661 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1662 unsigned idxPage = pPool->aidxDirtyPages[idxSlot];
1663 if (idxPage == NIL_PGMPOOL_IDX)
1664 return;
1665
1666 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1667 Assert(pPage->idx == idxPage);
1668 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1669
1670 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1671 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1672
1673# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1674 PVMCPU pVCpu = VMMGetCpu(pVM);
1675 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
1676# endif
1677
1678 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1679 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK);
1680 Assert(rc == VINF_SUCCESS);
1681 pPage->fDirty = false;
1682
1683# ifdef VBOX_STRICT
1684 uint64_t fFlags = 0;
1685 RTHCPHYS HCPhys;
1686 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->GCPtrDirtyFault, &fFlags, &HCPhys);
1687 AssertMsg( ( rc == VINF_SUCCESS
1688 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1689 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1690 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1691 || rc == VERR_PAGE_NOT_PRESENT,
1692 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->GCPtrDirtyFault, rc, fFlags));
1693# endif
1694
1695 /* Flush those PTEs that have changed. */
1696 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1697 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1698 void *pvGst;
1699 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1700 bool fFlush;
1701 unsigned cChanges;
1702
1703 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1704 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1705 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1706 else
1707 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1708 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1709
1710 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1711 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1712 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1713 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1714
1715 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1716 Assert(pPage->cModifications);
1717 if (cChanges < 4)
1718 pPage->cModifications = 1; /* must use > 0 here */
1719 else
1720 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1721
1722 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1723 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1724 pPool->idxFreeDirtyPage = idxSlot;
1725
1726 pPool->cDirtyPages--;
1727 pPool->aidxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1728 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1729 if (fFlush)
1730 {
1731 Assert(fAllowRemoval);
1732 Log(("Flush reused page table!\n"));
1733 pgmPoolFlushPage(pPool, pPage);
1734 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1735 }
1736 else
1737 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1738
1739# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1740 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
1741# endif
1742}
1743
1744
1745# ifndef IN_RING3
1746/**
1747 * Add a new dirty page
1748 *
1749 * @param pVM The cross context VM structure.
1750 * @param pPool The pool.
1751 * @param pPage The page.
1752 */
1753void pgmPoolAddDirtyPage(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1754{
1755 PGM_LOCK_ASSERT_OWNER(pVM);
1756 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1757 Assert(!pPage->fDirty);
1758
1759 unsigned idxFree = pPool->idxFreeDirtyPage;
1760 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1761 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1762
1763 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1764 {
1765 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1766 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1767 }
1768 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1769 AssertMsg(pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1770
1771 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1772
1773 /*
1774 * Make a copy of the guest page table as we require valid GCPhys addresses
1775 * when removing references to physical pages.
1776 * (The HCPhys linear lookup is *extremely* expensive!)
1777 */
1778 void *pvGst;
1779 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1780 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst, (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT) ? PAGE_SIZE : PAGE_SIZE/2);
1781# ifdef VBOX_STRICT
1782 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1783 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1784 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1785 else
1786 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1787 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1788# endif
1789 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1790
1791 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1792 pPage->fDirty = true;
1793 pPage->idxDirtyEntry = (uint8_t)idxFree; Assert(pPage->idxDirtyEntry == idxFree);
1794 pPool->aidxDirtyPages[idxFree] = pPage->idx;
1795 pPool->cDirtyPages++;
1796
1797 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1798 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1799 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1800 {
1801 unsigned i;
1802 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1803 {
1804 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1805 if (pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1806 {
1807 pPool->idxFreeDirtyPage = idxFree;
1808 break;
1809 }
1810 }
1811 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1812 }
1813
1814 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1815
1816 /*
1817 * Clear all references to this shadow table. See @bugref{7298}.
1818 */
1819 pgmPoolTrackClearPageUsers(pPool, pPage);
1820}
1821# endif /* !IN_RING3 */
1822
1823
1824/**
1825 * Check if the specified page is dirty (not write monitored)
1826 *
1827 * @return dirty or not
1828 * @param pVM The cross context VM structure.
1829 * @param GCPhys Guest physical address
1830 */
1831bool pgmPoolIsDirtyPageSlow(PVM pVM, RTGCPHYS GCPhys)
1832{
1833 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1834 PGM_LOCK_ASSERT_OWNER(pVM);
1835 if (!pPool->cDirtyPages)
1836 return false;
1837
1838 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1839
1840 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1841 {
1842 unsigned idxPage = pPool->aidxDirtyPages[i];
1843 if (idxPage != NIL_PGMPOOL_IDX)
1844 {
1845 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1846 if (pPage->GCPhys == GCPhys)
1847 return true;
1848 }
1849 }
1850 return false;
1851}
1852
1853
1854/**
1855 * Reset all dirty pages by reinstating page monitoring.
1856 *
1857 * @param pVM The cross context VM structure.
1858 */
1859void pgmPoolResetDirtyPages(PVMCC pVM)
1860{
1861 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1862 PGM_LOCK_ASSERT_OWNER(pVM);
1863 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1864
1865 if (!pPool->cDirtyPages)
1866 return;
1867
1868 Log(("pgmPoolResetDirtyPages\n"));
1869 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1870 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1871
1872 pPool->idxFreeDirtyPage = 0;
1873 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1874 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1875 {
1876 unsigned i;
1877 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1878 {
1879 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
1880 {
1881 pPool->idxFreeDirtyPage = i;
1882 break;
1883 }
1884 }
1885 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1886 }
1887
1888 Assert(pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
1889 return;
1890}
1891
1892
1893/**
1894 * Invalidate the PT entry for the specified page
1895 *
1896 * @param pVM The cross context VM structure.
1897 * @param GCPtrPage Guest page to invalidate
1898 */
1899void pgmPoolResetDirtyPage(PVM pVM, RTGCPTR GCPtrPage)
1900{
1901 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1902 PGM_LOCK_ASSERT_OWNER(pVM);
1903 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1904
1905 if (!pPool->cDirtyPages)
1906 return;
1907
1908 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage)); RT_NOREF_PV(GCPtrPage);
1909 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1910 {
1911 /** @todo What was intended here??? This looks incomplete... */
1912 }
1913}
1914
1915
1916/**
1917 * Reset all dirty pages by reinstating page monitoring.
1918 *
1919 * @param pVM The cross context VM structure.
1920 * @param GCPhysPT Physical address of the page table
1921 */
1922void pgmPoolInvalidateDirtyPage(PVMCC pVM, RTGCPHYS GCPhysPT)
1923{
1924 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1925 PGM_LOCK_ASSERT_OWNER(pVM);
1926 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1927 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
1928
1929 if (!pPool->cDirtyPages)
1930 return;
1931
1932 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1933
1934 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1935 {
1936 unsigned idxPage = pPool->aidxDirtyPages[i];
1937 if (idxPage != NIL_PGMPOOL_IDX)
1938 {
1939 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1940 if (pPage->GCPhys == GCPhysPT)
1941 {
1942 idxDirtyPage = i;
1943 break;
1944 }
1945 }
1946 }
1947
1948 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
1949 {
1950 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
1951 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1952 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1953 {
1954 unsigned i;
1955 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1956 {
1957 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
1958 {
1959 pPool->idxFreeDirtyPage = i;
1960 break;
1961 }
1962 }
1963 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1964 }
1965 }
1966}
1967
1968#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1969
1970/**
1971 * Inserts a page into the GCPhys hash table.
1972 *
1973 * @param pPool The pool.
1974 * @param pPage The page.
1975 */
1976DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1977{
1978 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1979 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1980 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1981 pPage->iNext = pPool->aiHash[iHash];
1982 pPool->aiHash[iHash] = pPage->idx;
1983}
1984
1985
1986/**
1987 * Removes a page from the GCPhys hash table.
1988 *
1989 * @param pPool The pool.
1990 * @param pPage The page.
1991 */
1992DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1993{
1994 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1995 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1996 if (pPool->aiHash[iHash] == pPage->idx)
1997 pPool->aiHash[iHash] = pPage->iNext;
1998 else
1999 {
2000 uint16_t iPrev = pPool->aiHash[iHash];
2001 for (;;)
2002 {
2003 const int16_t i = pPool->aPages[iPrev].iNext;
2004 if (i == pPage->idx)
2005 {
2006 pPool->aPages[iPrev].iNext = pPage->iNext;
2007 break;
2008 }
2009 if (i == NIL_PGMPOOL_IDX)
2010 {
2011 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
2012 break;
2013 }
2014 iPrev = i;
2015 }
2016 }
2017 pPage->iNext = NIL_PGMPOOL_IDX;
2018}
2019
2020
2021/**
2022 * Frees up one cache page.
2023 *
2024 * @returns VBox status code.
2025 * @retval VINF_SUCCESS on success.
2026 * @param pPool The pool.
2027 * @param iUser The user index.
2028 * @param pszTmpCaller OS X debugging.
2029 */
2030static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser, const char *pszTmpCaller)
2031{
2032 const PVMCC pVM = pPool->CTX_SUFF(pVM);
2033 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
2034 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
2035
2036 /*
2037 * Select one page from the tail of the age list.
2038 */
2039 PPGMPOOLPAGE pPage;
2040 for (unsigned iLoop = 0; ; iLoop++)
2041 {
2042 uint16_t iToFree = pPool->iAgeTail;
2043 if (iToFree == iUser && iUser != NIL_PGMPOOL_IDX)
2044 iToFree = pPool->aPages[iToFree].iAgePrev;
2045/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2046 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2047 {
2048 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2049 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2050 {
2051 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2052 continue;
2053 iToFree = i;
2054 break;
2055 }
2056 }
2057*/
2058 Assert(iToFree != iUser);
2059 if (RT_LIKELY(iToFree != NIL_PGMPOOL_IDX)) /* Temporary OS X debugging */
2060 { /* likely */ }
2061 else
2062 {
2063 size_t cbPool = RT_UOFFSETOF_DYN(PGMPOOL, aPages[pPool->cMaxPages])
2064 + pPool->cMaxUsers * sizeof(PGMPOOLUSER)
2065 + pPool->cMaxPhysExts * sizeof(PGMPOOLPHYSEXT);
2066 uint8_t *pbLastPage = (uint8_t *)pPool + ((cbPool - 1) & ~(uintptr_t)PAGE_OFFSET_MASK);
2067 AssertReleaseMsg(iToFree != NIL_PGMPOOL_IDX, ("%s: iToFree=%#x (iAgeTail=%#x) iUser=%#x iLoop=%u - pPool=%p (LB %#zx):\n"
2068 "%.512Rhxd\n"
2069 "pLastPage=%p:\n"
2070 "%.4096Rhxd\n",
2071 pszTmpCaller, iToFree, pPool->iAgeTail, iUser, iLoop,
2072 pPool, cbPool, pPool, pbLastPage, pbLastPage));
2073 }
2074 pPage = &pPool->aPages[iToFree];
2075
2076 /*
2077 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2078 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2079 */
2080 if ( !pgmPoolIsPageLocked(pPage)
2081 && pPage->idx >= PGMPOOL_IDX_FIRST /* paranoia (#6349) */)
2082 break;
2083 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2084 pgmPoolCacheUsed(pPool, pPage);
2085 AssertLogRelReturn(iLoop < 8192, VERR_PGM_POOL_TOO_MANY_LOOPS);
2086 }
2087
2088 /*
2089 * Found a usable page, flush it and return.
2090 */
2091 int rc = pgmPoolFlushPage(pPool, pPage);
2092 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2093 /** @todo find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2094 if (rc == VINF_SUCCESS)
2095 PGM_INVL_ALL_VCPU_TLBS(pVM);
2096 return rc;
2097}
2098
2099
2100/**
2101 * Checks if a kind mismatch is really a page being reused
2102 * or if it's just normal remappings.
2103 *
2104 * @returns true if reused and the cached page (enmKind1) should be flushed
2105 * @returns false if not reused.
2106 * @param enmKind1 The kind of the cached page.
2107 * @param enmKind2 The kind of the requested page.
2108 */
2109static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2110{
2111 switch (enmKind1)
2112 {
2113 /*
2114 * Never reuse them. There is no remapping in non-paging mode.
2115 */
2116 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2117 case PGMPOOLKIND_32BIT_PD_PHYS:
2118 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2119 case PGMPOOLKIND_PAE_PD_PHYS:
2120 case PGMPOOLKIND_PAE_PDPT_PHYS:
2121 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2122 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2123 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2124 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2125 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2126 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2127 return false;
2128
2129 /*
2130 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2131 */
2132 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2133 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2134 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2135 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2136 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2137 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2138 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2139 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2140 case PGMPOOLKIND_32BIT_PD:
2141 case PGMPOOLKIND_PAE_PDPT:
2142 switch (enmKind2)
2143 {
2144 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2145 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2146 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2147 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2148 case PGMPOOLKIND_64BIT_PML4:
2149 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2150 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2151 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2152 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2153 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2154 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2155 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2156 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2157 return true;
2158 default:
2159 return false;
2160 }
2161
2162 /*
2163 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2164 */
2165 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2166 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2167 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2168 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2169 case PGMPOOLKIND_64BIT_PML4:
2170 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2171 switch (enmKind2)
2172 {
2173 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2174 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2175 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2176 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2177 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2178 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2179 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2180 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2181 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2182 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2183 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2184 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2185 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2186 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2187 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2188 return true;
2189 default:
2190 return false;
2191 }
2192
2193 /*
2194 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2195 */
2196 case PGMPOOLKIND_ROOT_NESTED:
2197 return false;
2198
2199 default:
2200 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2201 }
2202}
2203
2204
2205/**
2206 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2207 *
2208 * @returns VBox status code.
2209 * @retval VINF_PGM_CACHED_PAGE on success.
2210 * @retval VERR_FILE_NOT_FOUND if not found.
2211 * @param pPool The pool.
2212 * @param GCPhys The GC physical address of the page we're gonna shadow.
2213 * @param enmKind The kind of mapping.
2214 * @param enmAccess Access type for the mapping (only relevant for big pages)
2215 * @param fA20Enabled Whether the CPU has the A20 gate enabled.
2216 * @param iUser The shadow page pool index of the user table. This is
2217 * NIL_PGMPOOL_IDX for root pages.
2218 * @param iUserTable The index into the user table (shadowed). Ignored if
2219 * root page
2220 * @param ppPage Where to store the pointer to the page.
2221 */
2222static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
2223 uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2224{
2225 /*
2226 * Look up the GCPhys in the hash.
2227 */
2228 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2229 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2230 if (i != NIL_PGMPOOL_IDX)
2231 {
2232 do
2233 {
2234 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2235 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2236 if (pPage->GCPhys == GCPhys)
2237 {
2238 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2239 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess
2240 && pPage->fA20Enabled == fA20Enabled)
2241 {
2242 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2243 * doesn't flush it in case there are no more free use records.
2244 */
2245 pgmPoolCacheUsed(pPool, pPage);
2246
2247 int rc = VINF_SUCCESS;
2248 if (iUser != NIL_PGMPOOL_IDX)
2249 rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2250 if (RT_SUCCESS(rc))
2251 {
2252 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2253 *ppPage = pPage;
2254 if (pPage->cModifications)
2255 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2256 STAM_COUNTER_INC(&pPool->StatCacheHits);
2257 return VINF_PGM_CACHED_PAGE;
2258 }
2259 return rc;
2260 }
2261
2262 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2263 {
2264 /*
2265 * The kind is different. In some cases we should now flush the page
2266 * as it has been reused, but in most cases this is normal remapping
2267 * of PDs as PT or big pages using the GCPhys field in a slightly
2268 * different way than the other kinds.
2269 */
2270 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2271 {
2272 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2273 pgmPoolFlushPage(pPool, pPage);
2274 break;
2275 }
2276 }
2277 }
2278
2279 /* next */
2280 i = pPage->iNext;
2281 } while (i != NIL_PGMPOOL_IDX);
2282 }
2283
2284 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2285 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2286 return VERR_FILE_NOT_FOUND;
2287}
2288
2289
2290/**
2291 * Inserts a page into the cache.
2292 *
2293 * @param pPool The pool.
2294 * @param pPage The cached page.
2295 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2296 */
2297static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2298{
2299 /*
2300 * Insert into the GCPhys hash if the page is fit for that.
2301 */
2302 Assert(!pPage->fCached);
2303 if (fCanBeCached)
2304 {
2305 pPage->fCached = true;
2306 pgmPoolHashInsert(pPool, pPage);
2307 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2308 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2309 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2310 }
2311 else
2312 {
2313 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2314 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2315 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2316 }
2317
2318 /*
2319 * Insert at the head of the age list.
2320 */
2321 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2322 pPage->iAgeNext = pPool->iAgeHead;
2323 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2324 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2325 else
2326 pPool->iAgeTail = pPage->idx;
2327 pPool->iAgeHead = pPage->idx;
2328}
2329
2330
2331/**
2332 * Flushes a cached page.
2333 *
2334 * @param pPool The pool.
2335 * @param pPage The cached page.
2336 */
2337static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2338{
2339 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2340
2341 /*
2342 * Remove the page from the hash.
2343 */
2344 if (pPage->fCached)
2345 {
2346 pPage->fCached = false;
2347 pgmPoolHashRemove(pPool, pPage);
2348 }
2349 else
2350 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2351
2352 /*
2353 * Remove it from the age list.
2354 */
2355 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2356 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2357 else
2358 pPool->iAgeTail = pPage->iAgePrev;
2359 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2360 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2361 else
2362 pPool->iAgeHead = pPage->iAgeNext;
2363 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2364 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2365}
2366
2367
2368/**
2369 * Looks for pages sharing the monitor.
2370 *
2371 * @returns Pointer to the head page.
2372 * @returns NULL if not found.
2373 * @param pPool The Pool
2374 * @param pNewPage The page which is going to be monitored.
2375 */
2376static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2377{
2378 /*
2379 * Look up the GCPhys in the hash.
2380 */
2381 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2382 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2383 if (i == NIL_PGMPOOL_IDX)
2384 return NULL;
2385 do
2386 {
2387 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2388 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2389 && pPage != pNewPage)
2390 {
2391 switch (pPage->enmKind)
2392 {
2393 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2394 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2395 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2396 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2397 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2398 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2399 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2400 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2401 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2402 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2403 case PGMPOOLKIND_64BIT_PML4:
2404 case PGMPOOLKIND_32BIT_PD:
2405 case PGMPOOLKIND_PAE_PDPT:
2406 {
2407 /* find the head */
2408 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2409 {
2410 Assert(pPage->iMonitoredPrev != pPage->idx);
2411 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2412 }
2413 return pPage;
2414 }
2415
2416 /* ignore, no monitoring. */
2417 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2418 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2419 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2420 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2421 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2422 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2423 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2424 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2425 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2426 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2427 case PGMPOOLKIND_ROOT_NESTED:
2428 case PGMPOOLKIND_PAE_PD_PHYS:
2429 case PGMPOOLKIND_PAE_PDPT_PHYS:
2430 case PGMPOOLKIND_32BIT_PD_PHYS:
2431 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2432 break;
2433 default:
2434 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2435 }
2436 }
2437
2438 /* next */
2439 i = pPage->iNext;
2440 } while (i != NIL_PGMPOOL_IDX);
2441 return NULL;
2442}
2443
2444
2445/**
2446 * Enabled write monitoring of a guest page.
2447 *
2448 * @returns VBox status code.
2449 * @retval VINF_SUCCESS on success.
2450 * @param pPool The pool.
2451 * @param pPage The cached page.
2452 */
2453static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2454{
2455 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2456
2457 /*
2458 * Filter out the relevant kinds.
2459 */
2460 switch (pPage->enmKind)
2461 {
2462 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2463 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2464 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2465 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2466 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2467 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2468 case PGMPOOLKIND_64BIT_PML4:
2469 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2470 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2471 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2472 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2473 case PGMPOOLKIND_32BIT_PD:
2474 case PGMPOOLKIND_PAE_PDPT:
2475 break;
2476
2477 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2478 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2479 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2480 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2481 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2482 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2483 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2484 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2485 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2486 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2487 case PGMPOOLKIND_ROOT_NESTED:
2488 /* Nothing to monitor here. */
2489 return VINF_SUCCESS;
2490
2491 case PGMPOOLKIND_32BIT_PD_PHYS:
2492 case PGMPOOLKIND_PAE_PDPT_PHYS:
2493 case PGMPOOLKIND_PAE_PD_PHYS:
2494 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2495 /* Nothing to monitor here. */
2496 return VINF_SUCCESS;
2497 default:
2498 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2499 }
2500
2501 /*
2502 * Install handler.
2503 */
2504 int rc;
2505 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2506 if (pPageHead)
2507 {
2508 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2509 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2510
2511#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2512 if (pPageHead->fDirty)
2513 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirtyEntry, false /* do not remove */);
2514#endif
2515
2516 pPage->iMonitoredPrev = pPageHead->idx;
2517 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2518 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2519 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2520 pPageHead->iMonitoredNext = pPage->idx;
2521 rc = VINF_SUCCESS;
2522 }
2523 else
2524 {
2525 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2526 PVMCC pVM = pPool->CTX_SUFF(pVM);
2527 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2528 rc = PGMHandlerPhysicalRegister(pVM, GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK, pPool->hAccessHandlerType,
2529 MMHyperCCToR3(pVM, pPage), MMHyperCCToR0(pVM, pPage), MMHyperCCToRC(pVM, pPage),
2530 NIL_RTR3PTR /*pszDesc*/);
2531 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2532 * the heap size should suffice. */
2533 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2534 PVMCPU pVCpu = VMMGetCpu(pVM);
2535 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2536 }
2537 pPage->fMonitored = true;
2538 return rc;
2539}
2540
2541
2542/**
2543 * Disables write monitoring of a guest page.
2544 *
2545 * @returns VBox status code.
2546 * @retval VINF_SUCCESS on success.
2547 * @param pPool The pool.
2548 * @param pPage The cached page.
2549 */
2550static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2551{
2552 /*
2553 * Filter out the relevant kinds.
2554 */
2555 switch (pPage->enmKind)
2556 {
2557 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2558 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2559 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2560 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2561 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2562 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2563 case PGMPOOLKIND_64BIT_PML4:
2564 case PGMPOOLKIND_32BIT_PD:
2565 case PGMPOOLKIND_PAE_PDPT:
2566 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2567 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2568 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2569 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2570 break;
2571
2572 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2573 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2574 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2575 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2576 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2577 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2578 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2579 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2580 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2581 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2582 case PGMPOOLKIND_ROOT_NESTED:
2583 case PGMPOOLKIND_PAE_PD_PHYS:
2584 case PGMPOOLKIND_PAE_PDPT_PHYS:
2585 case PGMPOOLKIND_32BIT_PD_PHYS:
2586 /* Nothing to monitor here. */
2587 Assert(!pPage->fMonitored);
2588 return VINF_SUCCESS;
2589
2590 default:
2591 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2592 }
2593 Assert(pPage->fMonitored);
2594
2595 /*
2596 * Remove the page from the monitored list or uninstall it if last.
2597 */
2598 const PVMCC pVM = pPool->CTX_SUFF(pVM);
2599 int rc;
2600 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2601 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2602 {
2603 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2604 {
2605 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2606 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2607 rc = PGMHandlerPhysicalChangeUserArgs(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK,
2608 MMHyperCCToR3(pVM, pNewHead), MMHyperCCToR0(pVM, pNewHead));
2609
2610 AssertFatalRCSuccess(rc);
2611 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2612 }
2613 else
2614 {
2615 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2616 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2617 {
2618 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2619 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2620 }
2621 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2622 rc = VINF_SUCCESS;
2623 }
2624 }
2625 else
2626 {
2627 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2628 AssertFatalRC(rc);
2629 PVMCPU pVCpu = VMMGetCpu(pVM);
2630 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2631 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2632 }
2633 pPage->fMonitored = false;
2634
2635 /*
2636 * Remove it from the list of modified pages (if in it).
2637 */
2638 pgmPoolMonitorModifiedRemove(pPool, pPage);
2639
2640 return rc;
2641}
2642
2643
2644/**
2645 * Inserts the page into the list of modified pages.
2646 *
2647 * @param pPool The pool.
2648 * @param pPage The page.
2649 */
2650void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2651{
2652 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2653 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2654 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2655 && pPool->iModifiedHead != pPage->idx,
2656 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2657 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2658 pPool->iModifiedHead, pPool->cModifiedPages));
2659
2660 pPage->iModifiedNext = pPool->iModifiedHead;
2661 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2662 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2663 pPool->iModifiedHead = pPage->idx;
2664 pPool->cModifiedPages++;
2665#ifdef VBOX_WITH_STATISTICS
2666 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2667 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2668#endif
2669}
2670
2671
2672/**
2673 * Removes the page from the list of modified pages and resets the
2674 * modification counter.
2675 *
2676 * @param pPool The pool.
2677 * @param pPage The page which is believed to be in the list of modified pages.
2678 */
2679static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2680{
2681 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2682 if (pPool->iModifiedHead == pPage->idx)
2683 {
2684 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2685 pPool->iModifiedHead = pPage->iModifiedNext;
2686 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2687 {
2688 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2689 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2690 }
2691 pPool->cModifiedPages--;
2692 }
2693 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2694 {
2695 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2696 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2697 {
2698 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2699 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2700 }
2701 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2702 pPool->cModifiedPages--;
2703 }
2704 else
2705 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2706 pPage->cModifications = 0;
2707}
2708
2709
2710/**
2711 * Zaps the list of modified pages, resetting their modification counters in the process.
2712 *
2713 * @param pVM The cross context VM structure.
2714 */
2715static void pgmPoolMonitorModifiedClearAll(PVMCC pVM)
2716{
2717 pgmLock(pVM);
2718 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2719 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2720
2721 unsigned cPages = 0; NOREF(cPages);
2722
2723#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2724 pgmPoolResetDirtyPages(pVM);
2725#endif
2726
2727 uint16_t idx = pPool->iModifiedHead;
2728 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2729 while (idx != NIL_PGMPOOL_IDX)
2730 {
2731 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2732 idx = pPage->iModifiedNext;
2733 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2734 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2735 pPage->cModifications = 0;
2736 Assert(++cPages);
2737 }
2738 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2739 pPool->cModifiedPages = 0;
2740 pgmUnlock(pVM);
2741}
2742
2743
2744/**
2745 * Handle SyncCR3 pool tasks
2746 *
2747 * @returns VBox status code.
2748 * @retval VINF_SUCCESS if successfully added.
2749 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2750 * @param pVCpu The cross context virtual CPU structure.
2751 * @remark Should only be used when monitoring is available, thus placed in
2752 * the PGMPOOL_WITH_MONITORING \#ifdef.
2753 */
2754int pgmPoolSyncCR3(PVMCPUCC pVCpu)
2755{
2756 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2757 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2758
2759 /*
2760 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2761 * Occasionally we will have to clear all the shadow page tables because we wanted
2762 * to monitor a page which was mapped by too many shadowed page tables. This operation
2763 * sometimes referred to as a 'lightweight flush'.
2764 */
2765# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2766 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2767 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2768# else /* !IN_RING3 */
2769 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2770 {
2771 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2772 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2773
2774 /* Make sure all other VCPUs return to ring 3. */
2775 if (pVM->cCpus > 1)
2776 {
2777 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2778 PGM_INVL_ALL_VCPU_TLBS(pVM);
2779 }
2780 return VINF_PGM_SYNC_CR3;
2781 }
2782# endif /* !IN_RING3 */
2783 else
2784 {
2785 pgmPoolMonitorModifiedClearAll(pVM);
2786
2787 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2788 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2789 {
2790 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2791 return pgmPoolSyncCR3(pVCpu);
2792 }
2793 }
2794 return VINF_SUCCESS;
2795}
2796
2797
2798/**
2799 * Frees up at least one user entry.
2800 *
2801 * @returns VBox status code.
2802 * @retval VINF_SUCCESS if successfully added.
2803 *
2804 * @param pPool The pool.
2805 * @param iUser The user index.
2806 * @param pszTmpCaller Temporary OS X debugging.
2807 */
2808static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser, const char *pszTmpCaller)
2809{
2810 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2811 /*
2812 * Just free cached pages in a braindead fashion.
2813 */
2814 /** @todo walk the age list backwards and free the first with usage. */
2815 int rc = VINF_SUCCESS;
2816 do
2817 {
2818 int rc2 = pgmPoolCacheFreeOne(pPool, iUser, pszTmpCaller);
2819 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2820 rc = rc2;
2821 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2822 return rc;
2823}
2824
2825
2826/**
2827 * Inserts a page into the cache.
2828 *
2829 * This will create user node for the page, insert it into the GCPhys
2830 * hash, and insert it into the age list.
2831 *
2832 * @returns VBox status code.
2833 * @retval VINF_SUCCESS if successfully added.
2834 *
2835 * @param pPool The pool.
2836 * @param pPage The cached page.
2837 * @param GCPhys The GC physical address of the page we're gonna shadow.
2838 * @param iUser The user index.
2839 * @param iUserTable The user table index.
2840 */
2841DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2842{
2843 int rc = VINF_SUCCESS;
2844 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2845
2846 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable)); RT_NOREF_PV(GCPhys);
2847
2848 if (iUser != NIL_PGMPOOL_IDX)
2849 {
2850#ifdef VBOX_STRICT
2851 /*
2852 * Check that the entry doesn't already exists.
2853 */
2854 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2855 {
2856 uint16_t i = pPage->iUserHead;
2857 do
2858 {
2859 Assert(i < pPool->cMaxUsers);
2860 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2861 i = paUsers[i].iNext;
2862 } while (i != NIL_PGMPOOL_USER_INDEX);
2863 }
2864#endif
2865
2866 /*
2867 * Find free a user node.
2868 */
2869 uint16_t i = pPool->iUserFreeHead;
2870 if (i == NIL_PGMPOOL_USER_INDEX)
2871 {
2872 rc = pgmPoolTrackFreeOneUser(pPool, iUser, __FUNCTION__);
2873 if (RT_FAILURE(rc))
2874 return rc;
2875 i = pPool->iUserFreeHead;
2876 }
2877
2878 /*
2879 * Unlink the user node from the free list,
2880 * initialize and insert it into the user list.
2881 */
2882 pPool->iUserFreeHead = paUsers[i].iNext;
2883 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2884 paUsers[i].iUser = iUser;
2885 paUsers[i].iUserTable = iUserTable;
2886 pPage->iUserHead = i;
2887 }
2888 else
2889 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2890
2891
2892 /*
2893 * Insert into cache and enable monitoring of the guest page if enabled.
2894 *
2895 * Until we implement caching of all levels, including the CR3 one, we'll
2896 * have to make sure we don't try monitor & cache any recursive reuse of
2897 * a monitored CR3 page. Because all windows versions are doing this we'll
2898 * have to be able to do combined access monitoring, CR3 + PT and
2899 * PD + PT (guest PAE).
2900 *
2901 * Update:
2902 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2903 */
2904 const bool fCanBeMonitored = true;
2905 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2906 if (fCanBeMonitored)
2907 {
2908 rc = pgmPoolMonitorInsert(pPool, pPage);
2909 AssertRC(rc);
2910 }
2911 return rc;
2912}
2913
2914
2915/**
2916 * Adds a user reference to a page.
2917 *
2918 * This will move the page to the head of the
2919 *
2920 * @returns VBox status code.
2921 * @retval VINF_SUCCESS if successfully added.
2922 *
2923 * @param pPool The pool.
2924 * @param pPage The cached page.
2925 * @param iUser The user index.
2926 * @param iUserTable The user table.
2927 */
2928static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2929{
2930 Log3(("pgmPoolTrackAddUser: GCPhys=%RGp iUser=%x iUserTable=%x\n", pPage->GCPhys, iUser, iUserTable));
2931 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2932 Assert(iUser != NIL_PGMPOOL_IDX);
2933
2934# ifdef VBOX_STRICT
2935 /*
2936 * Check that the entry doesn't already exists. We only allow multiple
2937 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
2938 */
2939 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2940 {
2941 uint16_t i = pPage->iUserHead;
2942 do
2943 {
2944 Assert(i < pPool->cMaxUsers);
2945 /** @todo this assertion looks odd... Shouldn't it be && here? */
2946 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2947 i = paUsers[i].iNext;
2948 } while (i != NIL_PGMPOOL_USER_INDEX);
2949 }
2950# endif
2951
2952 /*
2953 * Allocate a user node.
2954 */
2955 uint16_t i = pPool->iUserFreeHead;
2956 if (i == NIL_PGMPOOL_USER_INDEX)
2957 {
2958 int rc = pgmPoolTrackFreeOneUser(pPool, iUser, __FUNCTION__);
2959 if (RT_FAILURE(rc))
2960 return rc;
2961 i = pPool->iUserFreeHead;
2962 }
2963 pPool->iUserFreeHead = paUsers[i].iNext;
2964
2965 /*
2966 * Initialize the user node and insert it.
2967 */
2968 paUsers[i].iNext = pPage->iUserHead;
2969 paUsers[i].iUser = iUser;
2970 paUsers[i].iUserTable = iUserTable;
2971 pPage->iUserHead = i;
2972
2973# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2974 if (pPage->fDirty)
2975 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirtyEntry, false /* do not remove */);
2976# endif
2977
2978 /*
2979 * Tell the cache to update its replacement stats for this page.
2980 */
2981 pgmPoolCacheUsed(pPool, pPage);
2982 return VINF_SUCCESS;
2983}
2984
2985
2986/**
2987 * Frees a user record associated with a page.
2988 *
2989 * This does not clear the entry in the user table, it simply replaces the
2990 * user record to the chain of free records.
2991 *
2992 * @param pPool The pool.
2993 * @param pPage The shadow page.
2994 * @param iUser The shadow page pool index of the user table.
2995 * @param iUserTable The index into the user table (shadowed).
2996 *
2997 * @remarks Don't call this for root pages.
2998 */
2999static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3000{
3001 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
3002 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3003 Assert(iUser != NIL_PGMPOOL_IDX);
3004
3005 /*
3006 * Unlink and free the specified user entry.
3007 */
3008
3009 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
3010 uint16_t i = pPage->iUserHead;
3011 if ( i != NIL_PGMPOOL_USER_INDEX
3012 && paUsers[i].iUser == iUser
3013 && paUsers[i].iUserTable == iUserTable)
3014 {
3015 pPage->iUserHead = paUsers[i].iNext;
3016
3017 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3018 paUsers[i].iNext = pPool->iUserFreeHead;
3019 pPool->iUserFreeHead = i;
3020 return;
3021 }
3022
3023 /* General: Linear search. */
3024 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
3025 while (i != NIL_PGMPOOL_USER_INDEX)
3026 {
3027 if ( paUsers[i].iUser == iUser
3028 && paUsers[i].iUserTable == iUserTable)
3029 {
3030 if (iPrev != NIL_PGMPOOL_USER_INDEX)
3031 paUsers[iPrev].iNext = paUsers[i].iNext;
3032 else
3033 pPage->iUserHead = paUsers[i].iNext;
3034
3035 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3036 paUsers[i].iNext = pPool->iUserFreeHead;
3037 pPool->iUserFreeHead = i;
3038 return;
3039 }
3040 iPrev = i;
3041 i = paUsers[i].iNext;
3042 }
3043
3044 /* Fatal: didn't find it */
3045 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
3046 iUser, iUserTable, pPage->GCPhys));
3047}
3048
3049
3050#if 0 /* unused */
3051/**
3052 * Gets the entry size of a shadow table.
3053 *
3054 * @param enmKind The kind of page.
3055 *
3056 * @returns The size of the entry in bytes. That is, 4 or 8.
3057 * @returns If the kind is not for a table, an assertion is raised and 0 is
3058 * returned.
3059 */
3060DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
3061{
3062 switch (enmKind)
3063 {
3064 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3065 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3066 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3067 case PGMPOOLKIND_32BIT_PD:
3068 case PGMPOOLKIND_32BIT_PD_PHYS:
3069 return 4;
3070
3071 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3072 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3073 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3074 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3075 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3076 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3077 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3078 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3079 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3080 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3081 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3082 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3083 case PGMPOOLKIND_64BIT_PML4:
3084 case PGMPOOLKIND_PAE_PDPT:
3085 case PGMPOOLKIND_ROOT_NESTED:
3086 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3087 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3088 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3089 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3090 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3091 case PGMPOOLKIND_PAE_PD_PHYS:
3092 case PGMPOOLKIND_PAE_PDPT_PHYS:
3093 return 8;
3094
3095 default:
3096 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3097 }
3098}
3099#endif /* unused */
3100
3101#if 0 /* unused */
3102/**
3103 * Gets the entry size of a guest table.
3104 *
3105 * @param enmKind The kind of page.
3106 *
3107 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3108 * @returns If the kind is not for a table, an assertion is raised and 0 is
3109 * returned.
3110 */
3111DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3112{
3113 switch (enmKind)
3114 {
3115 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3116 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3117 case PGMPOOLKIND_32BIT_PD:
3118 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3119 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3120 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3121 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3122 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3123 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3124 return 4;
3125
3126 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3127 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3128 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3129 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3130 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3131 case PGMPOOLKIND_64BIT_PML4:
3132 case PGMPOOLKIND_PAE_PDPT:
3133 return 8;
3134
3135 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3136 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3137 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3138 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3139 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3140 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3141 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3142 case PGMPOOLKIND_ROOT_NESTED:
3143 case PGMPOOLKIND_PAE_PD_PHYS:
3144 case PGMPOOLKIND_PAE_PDPT_PHYS:
3145 case PGMPOOLKIND_32BIT_PD_PHYS:
3146 /** @todo can we return 0? (nobody is calling this...) */
3147 AssertFailed();
3148 return 0;
3149
3150 default:
3151 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3152 }
3153}
3154#endif /* unused */
3155
3156
3157/**
3158 * Checks one shadow page table entry for a mapping of a physical page.
3159 *
3160 * @returns true / false indicating removal of all relevant PTEs
3161 *
3162 * @param pVM The cross context VM structure.
3163 * @param pPhysPage The guest page in question.
3164 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3165 * @param iShw The shadow page table.
3166 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3167 */
3168static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3169{
3170 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3171 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3172 bool fRet = false;
3173
3174 /*
3175 * Assert sanity.
3176 */
3177 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3178 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3179 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3180
3181 /*
3182 * Then, clear the actual mappings to the page in the shadow PT.
3183 */
3184 switch (pPage->enmKind)
3185 {
3186 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3187 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3188 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3189 {
3190 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3191 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3192 uint32_t u32AndMask = 0;
3193 uint32_t u32OrMask = 0;
3194
3195 if (!fFlushPTEs)
3196 {
3197 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3198 {
3199 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3200 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3201 u32OrMask = X86_PTE_RW;
3202 u32AndMask = UINT32_MAX;
3203 fRet = true;
3204 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3205 break;
3206
3207 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3208 u32OrMask = 0;
3209 u32AndMask = ~X86_PTE_RW;
3210 fRet = true;
3211 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3212 break;
3213 default:
3214 /* (shouldn't be here, will assert below) */
3215 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3216 break;
3217 }
3218 }
3219 else
3220 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3221
3222 /* Update the counter if we're removing references. */
3223 if (!u32AndMask)
3224 {
3225 Assert(pPage->cPresent);
3226 Assert(pPool->cPresent);
3227 pPage->cPresent--;
3228 pPool->cPresent--;
3229 }
3230
3231 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3232 {
3233 X86PTE Pte;
3234
3235 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3236 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3237 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3238 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3239
3240 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3241 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3242 return fRet;
3243 }
3244#ifdef LOG_ENABLED
3245 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3246 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3247 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3248 {
3249 Log(("i=%d cFound=%d\n", i, ++cFound));
3250 }
3251#endif
3252 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3253 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3254 break;
3255 }
3256
3257 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3258 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3259 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3260 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3261 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3262 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3263 {
3264 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3265 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3266 uint64_t u64OrMask = 0;
3267 uint64_t u64AndMask = 0;
3268
3269 if (!fFlushPTEs)
3270 {
3271 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3272 {
3273 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3274 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3275 u64OrMask = X86_PTE_RW;
3276 u64AndMask = UINT64_MAX;
3277 fRet = true;
3278 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3279 break;
3280
3281 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3282 u64OrMask = 0;
3283 u64AndMask = ~(uint64_t)X86_PTE_RW;
3284 fRet = true;
3285 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3286 break;
3287
3288 default:
3289 /* (shouldn't be here, will assert below) */
3290 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3291 break;
3292 }
3293 }
3294 else
3295 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3296
3297 /* Update the counter if we're removing references. */
3298 if (!u64AndMask)
3299 {
3300 Assert(pPage->cPresent);
3301 Assert(pPool->cPresent);
3302 pPage->cPresent--;
3303 pPool->cPresent--;
3304 }
3305
3306 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3307 {
3308 X86PTEPAE Pte;
3309
3310 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3311 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3312 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3313 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3314
3315 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3316 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3317 return fRet;
3318 }
3319#ifdef LOG_ENABLED
3320 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3321 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3322 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3323 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3324 Log(("i=%d cFound=%d\n", i, ++cFound));
3325#endif
3326 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3327 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3328 break;
3329 }
3330
3331#ifdef PGM_WITH_LARGE_PAGES
3332 /* Large page case only. */
3333 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3334 {
3335 Assert(pVM->pgm.s.fNestedPaging);
3336
3337 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3338 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3339
3340 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3341 {
3342 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3343 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3344 pPD->a[iPte].u = 0;
3345 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3346
3347 /* Update the counter as we're removing references. */
3348 Assert(pPage->cPresent);
3349 Assert(pPool->cPresent);
3350 pPage->cPresent--;
3351 pPool->cPresent--;
3352
3353 return fRet;
3354 }
3355# ifdef LOG_ENABLED
3356 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3357 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3358 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3359 Log(("i=%d cFound=%d\n", i, ++cFound));
3360# endif
3361 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3362 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3363 break;
3364 }
3365
3366 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3367 case PGMPOOLKIND_PAE_PD_PHYS:
3368 {
3369 Assert(pVM->pgm.s.fNestedPaging);
3370
3371 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3372 PX86PD pPD = (PX86PD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3373
3374 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3375 {
3376 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3377 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3378 pPD->a[iPte].u = 0;
3379 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3380
3381 /* Update the counter as we're removing references. */
3382 Assert(pPage->cPresent);
3383 Assert(pPool->cPresent);
3384 pPage->cPresent--;
3385 pPool->cPresent--;
3386 return fRet;
3387 }
3388# ifdef LOG_ENABLED
3389 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3390 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3391 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3392 Log(("i=%d cFound=%d\n", i, ++cFound));
3393# endif
3394 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3395 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3396 break;
3397 }
3398#endif /* PGM_WITH_LARGE_PAGES */
3399
3400 default:
3401 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3402 }
3403
3404 /* not reached. */
3405#ifndef _MSC_VER
3406 return fRet;
3407#endif
3408}
3409
3410
3411/**
3412 * Scans one shadow page table for mappings of a physical page.
3413 *
3414 * @param pVM The cross context VM structure.
3415 * @param pPhysPage The guest page in question.
3416 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3417 * @param iShw The shadow page table.
3418 */
3419static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3420{
3421 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3422
3423 /* We should only come here with when there's only one reference to this physical page. */
3424 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3425
3426 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3427 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3428 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3429 if (!fKeptPTEs)
3430 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3431 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3432}
3433
3434
3435/**
3436 * Flushes a list of shadow page tables mapping the same physical page.
3437 *
3438 * @param pVM The cross context VM structure.
3439 * @param pPhysPage The guest page in question.
3440 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3441 * @param iPhysExt The physical cross reference extent list to flush.
3442 */
3443static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3444{
3445 PGM_LOCK_ASSERT_OWNER(pVM);
3446 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3447 bool fKeepList = false;
3448
3449 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3450 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt=%u\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3451
3452 const uint16_t iPhysExtStart = iPhysExt;
3453 PPGMPOOLPHYSEXT pPhysExt;
3454 do
3455 {
3456 Assert(iPhysExt < pPool->cMaxPhysExts);
3457 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3458 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3459 {
3460 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3461 {
3462 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3463 if (!fKeptPTEs)
3464 {
3465 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3466 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3467 }
3468 else
3469 fKeepList = true;
3470 }
3471 }
3472 /* next */
3473 iPhysExt = pPhysExt->iNext;
3474 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3475
3476 if (!fKeepList)
3477 {
3478 /* insert the list into the free list and clear the ram range entry. */
3479 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3480 pPool->iPhysExtFreeHead = iPhysExtStart;
3481 /* Invalidate the tracking data. */
3482 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3483 }
3484
3485 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3486}
3487
3488
3489/**
3490 * Flushes all shadow page table mappings of the given guest page.
3491 *
3492 * This is typically called when the host page backing the guest one has been
3493 * replaced or when the page protection was changed due to a guest access
3494 * caught by the monitoring.
3495 *
3496 * @returns VBox status code.
3497 * @retval VINF_SUCCESS if all references has been successfully cleared.
3498 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3499 * pool cleaning. FF and sync flags are set.
3500 *
3501 * @param pVM The cross context VM structure.
3502 * @param GCPhysPage GC physical address of the page in question
3503 * @param pPhysPage The guest page in question.
3504 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3505 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3506 * flushed, it is NOT touched if this isn't necessary.
3507 * The caller MUST initialized this to @a false.
3508 */
3509int pgmPoolTrackUpdateGCPhys(PVMCC pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3510{
3511 PVMCPUCC pVCpu = VMMGetCpu(pVM);
3512 pgmLock(pVM);
3513 int rc = VINF_SUCCESS;
3514
3515#ifdef PGM_WITH_LARGE_PAGES
3516 /* Is this page part of a large page? */
3517 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3518 {
3519 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3520 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3521
3522 /* Fetch the large page base. */
3523 PPGMPAGE pLargePage;
3524 if (GCPhysBase != GCPhysPage)
3525 {
3526 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3527 AssertFatal(pLargePage);
3528 }
3529 else
3530 pLargePage = pPhysPage;
3531
3532 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3533
3534 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3535 {
3536 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3537 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3538 pVM->pgm.s.cLargePagesDisabled++;
3539
3540 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3541 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3542
3543 *pfFlushTLBs = true;
3544 pgmUnlock(pVM);
3545 return rc;
3546 }
3547 }
3548#else
3549 NOREF(GCPhysPage);
3550#endif /* PGM_WITH_LARGE_PAGES */
3551
3552 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3553 if (u16)
3554 {
3555 /*
3556 * The zero page is currently screwing up the tracking and we'll
3557 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3558 * is defined, zero pages won't normally be mapped. Some kind of solution
3559 * will be needed for this problem of course, but it will have to wait...
3560 */
3561 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3562 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3563 rc = VINF_PGM_GCPHYS_ALIASED;
3564 else
3565 {
3566# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 /** @todo we can drop this now. */
3567 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3568 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3569 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
3570# endif
3571
3572 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3573 {
3574 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3575 pgmPoolTrackFlushGCPhysPT(pVM,
3576 pPhysPage,
3577 fFlushPTEs,
3578 PGMPOOL_TD_GET_IDX(u16));
3579 }
3580 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3581 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3582 else
3583 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3584 *pfFlushTLBs = true;
3585
3586# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3587 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
3588# endif
3589 }
3590 }
3591
3592 if (rc == VINF_PGM_GCPHYS_ALIASED)
3593 {
3594 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3595 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3596 rc = VINF_PGM_SYNC_CR3;
3597 }
3598 pgmUnlock(pVM);
3599 return rc;
3600}
3601
3602
3603/**
3604 * Scans all shadow page tables for mappings of a physical page.
3605 *
3606 * This may be slow, but it's most likely more efficient than cleaning
3607 * out the entire page pool / cache.
3608 *
3609 * @returns VBox status code.
3610 * @retval VINF_SUCCESS if all references has been successfully cleared.
3611 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3612 * a page pool cleaning.
3613 *
3614 * @param pVM The cross context VM structure.
3615 * @param pPhysPage The guest page in question.
3616 */
3617int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage)
3618{
3619 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3620 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3621 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3622 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3623
3624 /*
3625 * There is a limit to what makes sense.
3626 */
3627 if ( pPool->cPresent > 1024
3628 && pVM->cCpus == 1)
3629 {
3630 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3631 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3632 return VINF_PGM_GCPHYS_ALIASED;
3633 }
3634
3635 /*
3636 * Iterate all the pages until we've encountered all that in use.
3637 * This is simple but not quite optimal solution.
3638 */
3639 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P; /** @todo drop X86_PTE_P here as we always test if present separately, anyway. */
3640 const uint32_t u32 = u64; /** @todo move into the 32BIT_PT_xx case */
3641 unsigned cLeft = pPool->cUsedPages;
3642 unsigned iPage = pPool->cCurPages;
3643 while (--iPage >= PGMPOOL_IDX_FIRST)
3644 {
3645 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3646 if ( pPage->GCPhys != NIL_RTGCPHYS
3647 && pPage->cPresent)
3648 {
3649 switch (pPage->enmKind)
3650 {
3651 /*
3652 * We only care about shadow page tables.
3653 */
3654 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3655 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3656 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3657 {
3658 unsigned cPresent = pPage->cPresent;
3659 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3660 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3661 if (pPT->a[i].n.u1Present)
3662 {
3663 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3664 {
3665 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3666 pPT->a[i].u = 0;
3667
3668 /* Update the counter as we're removing references. */
3669 Assert(pPage->cPresent);
3670 Assert(pPool->cPresent);
3671 pPage->cPresent--;
3672 pPool->cPresent--;
3673 }
3674 if (!--cPresent)
3675 break;
3676 }
3677 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3678 break;
3679 }
3680
3681 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3682 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3683 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3684 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3685 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3686 {
3687 unsigned cPresent = pPage->cPresent;
3688 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3689 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3690 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3691 {
3692 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3693 {
3694 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3695 PGMSHWPTEPAE_SET(pPT->a[i], 0); /// @todo why not atomic?
3696
3697 /* Update the counter as we're removing references. */
3698 Assert(pPage->cPresent);
3699 Assert(pPool->cPresent);
3700 pPage->cPresent--;
3701 pPool->cPresent--;
3702 }
3703 if (!--cPresent)
3704 break;
3705 }
3706 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3707 break;
3708 }
3709
3710 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3711 {
3712 unsigned cPresent = pPage->cPresent;
3713 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3714 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3715 if (pPT->a[i].n.u1Present)
3716 {
3717 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3718 {
3719 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3720 pPT->a[i].u = 0;
3721
3722 /* Update the counter as we're removing references. */
3723 Assert(pPage->cPresent);
3724 Assert(pPool->cPresent);
3725 pPage->cPresent--;
3726 pPool->cPresent--;
3727 }
3728 if (!--cPresent)
3729 break;
3730 }
3731 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3732 break;
3733 }
3734 }
3735
3736 if (!--cLeft)
3737 break;
3738 }
3739 }
3740
3741 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3742 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3743
3744 /*
3745 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3746 */
3747 if (pPool->cPresent > 1024)
3748 {
3749 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3750 return VINF_PGM_GCPHYS_ALIASED;
3751 }
3752
3753 return VINF_SUCCESS;
3754}
3755
3756
3757/**
3758 * Clears the user entry in a user table.
3759 *
3760 * This is used to remove all references to a page when flushing it.
3761 */
3762static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3763{
3764 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3765 Assert(pUser->iUser < pPool->cCurPages);
3766 uint32_t iUserTable = pUser->iUserTable;
3767
3768 /*
3769 * Map the user page. Ignore references made by fictitious pages.
3770 */
3771 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3772 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3773 union
3774 {
3775 uint64_t *pau64;
3776 uint32_t *pau32;
3777 } u;
3778 if (pUserPage->idx < PGMPOOL_IDX_FIRST)
3779 {
3780 Assert(!pUserPage->pvPageR3);
3781 return;
3782 }
3783 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3784
3785
3786 /* Safety precaution in case we change the paging for other modes too in the future. */
3787 Assert(!pgmPoolIsPageLocked(pPage)); RT_NOREF_PV(pPage);
3788
3789#ifdef VBOX_STRICT
3790 /*
3791 * Some sanity checks.
3792 */
3793 switch (pUserPage->enmKind)
3794 {
3795 case PGMPOOLKIND_32BIT_PD:
3796 case PGMPOOLKIND_32BIT_PD_PHYS:
3797 Assert(iUserTable < X86_PG_ENTRIES);
3798 break;
3799 case PGMPOOLKIND_PAE_PDPT:
3800 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3801 case PGMPOOLKIND_PAE_PDPT_PHYS:
3802 Assert(iUserTable < 4);
3803 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3804 break;
3805 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3806 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3807 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3808 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3809 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3810 case PGMPOOLKIND_PAE_PD_PHYS:
3811 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3812 break;
3813 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3814 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3815 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3816 break;
3817 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3818 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3819 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3820 break;
3821 case PGMPOOLKIND_64BIT_PML4:
3822 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3823 /* GCPhys >> PAGE_SHIFT is the index here */
3824 break;
3825 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3826 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3827 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3828 break;
3829
3830 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3831 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3832 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3833 break;
3834
3835 case PGMPOOLKIND_ROOT_NESTED:
3836 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3837 break;
3838
3839 default:
3840 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3841 break;
3842 }
3843#endif /* VBOX_STRICT */
3844
3845 /*
3846 * Clear the entry in the user page.
3847 */
3848 switch (pUserPage->enmKind)
3849 {
3850 /* 32-bit entries */
3851 case PGMPOOLKIND_32BIT_PD:
3852 case PGMPOOLKIND_32BIT_PD_PHYS:
3853 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
3854 break;
3855
3856 /* 64-bit entries */
3857 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3858 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3859 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3860 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3861 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3862 case PGMPOOLKIND_PAE_PD_PHYS:
3863 case PGMPOOLKIND_PAE_PDPT_PHYS:
3864 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3865 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3866 case PGMPOOLKIND_64BIT_PML4:
3867 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3868 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3869 case PGMPOOLKIND_PAE_PDPT:
3870 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3871 case PGMPOOLKIND_ROOT_NESTED:
3872 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3873 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3874 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
3875 break;
3876
3877 default:
3878 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3879 }
3880 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
3881}
3882
3883
3884/**
3885 * Clears all users of a page.
3886 */
3887static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3888{
3889 /*
3890 * Free all the user records.
3891 */
3892 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3893
3894 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3895 uint16_t i = pPage->iUserHead;
3896 while (i != NIL_PGMPOOL_USER_INDEX)
3897 {
3898 /* Clear enter in user table. */
3899 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3900
3901 /* Free it. */
3902 const uint16_t iNext = paUsers[i].iNext;
3903 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3904 paUsers[i].iNext = pPool->iUserFreeHead;
3905 pPool->iUserFreeHead = i;
3906
3907 /* Next. */
3908 i = iNext;
3909 }
3910 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3911}
3912
3913
3914/**
3915 * Allocates a new physical cross reference extent.
3916 *
3917 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3918 * @param pVM The cross context VM structure.
3919 * @param piPhysExt Where to store the phys ext index.
3920 */
3921PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3922{
3923 PGM_LOCK_ASSERT_OWNER(pVM);
3924 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3925 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3926 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3927 {
3928 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3929 return NULL;
3930 }
3931 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3932 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3933 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3934 *piPhysExt = iPhysExt;
3935 return pPhysExt;
3936}
3937
3938
3939/**
3940 * Frees a physical cross reference extent.
3941 *
3942 * @param pVM The cross context VM structure.
3943 * @param iPhysExt The extent to free.
3944 */
3945void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3946{
3947 PGM_LOCK_ASSERT_OWNER(pVM);
3948 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3949 Assert(iPhysExt < pPool->cMaxPhysExts);
3950 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3951 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3952 {
3953 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3954 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3955 }
3956 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3957 pPool->iPhysExtFreeHead = iPhysExt;
3958}
3959
3960
3961/**
3962 * Frees a physical cross reference extent.
3963 *
3964 * @param pVM The cross context VM structure.
3965 * @param iPhysExt The extent to free.
3966 */
3967void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3968{
3969 PGM_LOCK_ASSERT_OWNER(pVM);
3970 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3971
3972 const uint16_t iPhysExtStart = iPhysExt;
3973 PPGMPOOLPHYSEXT pPhysExt;
3974 do
3975 {
3976 Assert(iPhysExt < pPool->cMaxPhysExts);
3977 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3978 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3979 {
3980 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3981 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3982 }
3983
3984 /* next */
3985 iPhysExt = pPhysExt->iNext;
3986 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3987
3988 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3989 pPool->iPhysExtFreeHead = iPhysExtStart;
3990}
3991
3992
3993/**
3994 * Insert a reference into a list of physical cross reference extents.
3995 *
3996 * @returns The new tracking data for PGMPAGE.
3997 *
3998 * @param pVM The cross context VM structure.
3999 * @param iPhysExt The physical extent index of the list head.
4000 * @param iShwPT The shadow page table index.
4001 * @param iPte Page table entry
4002 *
4003 */
4004static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
4005{
4006 PGM_LOCK_ASSERT_OWNER(pVM);
4007 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4008 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4009
4010 /*
4011 * Special common cases.
4012 */
4013 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
4014 {
4015 paPhysExts[iPhysExt].aidx[1] = iShwPT;
4016 paPhysExts[iPhysExt].apte[1] = iPte;
4017 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4018 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
4019 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4020 }
4021 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
4022 {
4023 paPhysExts[iPhysExt].aidx[2] = iShwPT;
4024 paPhysExts[iPhysExt].apte[2] = iPte;
4025 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4026 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
4027 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4028 }
4029 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
4030
4031 /*
4032 * General treatment.
4033 */
4034 const uint16_t iPhysExtStart = iPhysExt;
4035 unsigned cMax = 15;
4036 for (;;)
4037 {
4038 Assert(iPhysExt < pPool->cMaxPhysExts);
4039 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4040 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
4041 {
4042 paPhysExts[iPhysExt].aidx[i] = iShwPT;
4043 paPhysExts[iPhysExt].apte[i] = iPte;
4044 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4045 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
4046 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
4047 }
4048 if (!--cMax)
4049 {
4050 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackOverflows);
4051 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4052 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
4053 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4054 }
4055
4056 /* advance */
4057 iPhysExt = paPhysExts[iPhysExt].iNext;
4058 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4059 break;
4060 }
4061
4062 /*
4063 * Add another extent to the list.
4064 */
4065 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4066 if (!pNew)
4067 {
4068 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackNoExtentsLeft);
4069 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4070 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4071 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4072 }
4073 pNew->iNext = iPhysExtStart;
4074 pNew->aidx[0] = iShwPT;
4075 pNew->apte[0] = iPte;
4076 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4077 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4078}
4079
4080
4081/**
4082 * Add a reference to guest physical page where extents are in use.
4083 *
4084 * @returns The new tracking data for PGMPAGE.
4085 *
4086 * @param pVM The cross context VM structure.
4087 * @param pPhysPage Pointer to the aPages entry in the ram range.
4088 * @param u16 The ram range flags (top 16-bits).
4089 * @param iShwPT The shadow page table index.
4090 * @param iPte Page table entry
4091 */
4092uint16_t pgmPoolTrackPhysExtAddref(PVMCC pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4093{
4094 pgmLock(pVM);
4095 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4096 {
4097 /*
4098 * Convert to extent list.
4099 */
4100 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4101 uint16_t iPhysExt;
4102 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4103 if (pPhysExt)
4104 {
4105 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4106 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliased);
4107 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4108 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4109 pPhysExt->aidx[1] = iShwPT;
4110 pPhysExt->apte[1] = iPte;
4111 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4112 }
4113 else
4114 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4115 }
4116 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4117 {
4118 /*
4119 * Insert into the extent list.
4120 */
4121 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4122 }
4123 else
4124 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedLots);
4125 pgmUnlock(pVM);
4126 return u16;
4127}
4128
4129
4130/**
4131 * Clear references to guest physical memory.
4132 *
4133 * @param pPool The pool.
4134 * @param pPage The page.
4135 * @param pPhysPage Pointer to the aPages entry in the ram range.
4136 * @param iPte Shadow PTE index
4137 */
4138void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4139{
4140 PVMCC pVM = pPool->CTX_SUFF(pVM);
4141 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4142 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4143
4144 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4145 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4146 {
4147 pgmLock(pVM);
4148
4149 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4150 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4151 do
4152 {
4153 Assert(iPhysExt < pPool->cMaxPhysExts);
4154
4155 /*
4156 * Look for the shadow page and check if it's all freed.
4157 */
4158 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4159 {
4160 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4161 && paPhysExts[iPhysExt].apte[i] == iPte)
4162 {
4163 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4164 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4165
4166 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4167 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4168 {
4169 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4170 pgmUnlock(pVM);
4171 return;
4172 }
4173
4174 /* we can free the node. */
4175 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4176 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4177 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4178 {
4179 /* lonely node */
4180 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4181 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4182 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4183 }
4184 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4185 {
4186 /* head */
4187 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4188 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4189 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4190 }
4191 else
4192 {
4193 /* in list */
4194 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4195 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4196 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4197 }
4198 iPhysExt = iPhysExtNext;
4199 pgmUnlock(pVM);
4200 return;
4201 }
4202 }
4203
4204 /* next */
4205 iPhysExtPrev = iPhysExt;
4206 iPhysExt = paPhysExts[iPhysExt].iNext;
4207 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4208
4209 pgmUnlock(pVM);
4210 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4211 }
4212 else /* nothing to do */
4213 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4214}
4215
4216/**
4217 * Clear references to guest physical memory.
4218 *
4219 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4220 * physical address is assumed to be correct, so the linear search can be
4221 * skipped and we can assert at an earlier point.
4222 *
4223 * @param pPool The pool.
4224 * @param pPage The page.
4225 * @param HCPhys The host physical address corresponding to the guest page.
4226 * @param GCPhys The guest physical address corresponding to HCPhys.
4227 * @param iPte Shadow PTE index
4228 */
4229static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4230{
4231 /*
4232 * Lookup the page and check if it checks out before derefing it.
4233 */
4234 PVMCC pVM = pPool->CTX_SUFF(pVM);
4235 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4236 if (pPhysPage)
4237 {
4238 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4239#ifdef LOG_ENABLED
4240 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4241 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4242#endif
4243 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4244 {
4245 Assert(pPage->cPresent);
4246 Assert(pPool->cPresent);
4247 pPage->cPresent--;
4248 pPool->cPresent--;
4249 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4250 return;
4251 }
4252
4253 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp\n",
4254 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage)));
4255 }
4256 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4257}
4258
4259
4260/**
4261 * Clear references to guest physical memory.
4262 *
4263 * @param pPool The pool.
4264 * @param pPage The page.
4265 * @param HCPhys The host physical address corresponding to the guest page.
4266 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4267 * @param iPte Shadow pte index
4268 */
4269void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4270{
4271 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4272
4273 /*
4274 * Try the hint first.
4275 */
4276 RTHCPHYS HCPhysHinted;
4277 PVMCC pVM = pPool->CTX_SUFF(pVM);
4278 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4279 if (pPhysPage)
4280 {
4281 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4282 Assert(HCPhysHinted);
4283 if (HCPhysHinted == HCPhys)
4284 {
4285 Assert(pPage->cPresent);
4286 Assert(pPool->cPresent);
4287 pPage->cPresent--;
4288 pPool->cPresent--;
4289 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4290 return;
4291 }
4292 }
4293 else
4294 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4295
4296 /*
4297 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4298 */
4299 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4300 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX);
4301 while (pRam)
4302 {
4303 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4304 while (iPage-- > 0)
4305 {
4306 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4307 {
4308 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4309 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4310 Assert(pPage->cPresent);
4311 Assert(pPool->cPresent);
4312 pPage->cPresent--;
4313 pPool->cPresent--;
4314 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4315 return;
4316 }
4317 }
4318 pRam = pRam->CTX_SUFF(pNext);
4319 }
4320
4321 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4322}
4323
4324
4325/**
4326 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4327 *
4328 * @param pPool The pool.
4329 * @param pPage The page.
4330 * @param pShwPT The shadow page table (mapping of the page).
4331 * @param pGstPT The guest page table.
4332 */
4333DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4334{
4335 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4336 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4337 {
4338 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4339 if (pShwPT->a[i].n.u1Present)
4340 {
4341 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4342 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4343 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & fPgMask, i);
4344 if (!pPage->cPresent)
4345 break;
4346 }
4347 }
4348}
4349
4350
4351/**
4352 * Clear references to guest physical memory in a PAE / 32-bit page table.
4353 *
4354 * @param pPool The pool.
4355 * @param pPage The page.
4356 * @param pShwPT The shadow page table (mapping of the page).
4357 * @param pGstPT The guest page table (just a half one).
4358 */
4359DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4360{
4361 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4362 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4363 {
4364 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4365 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4366 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4367 {
4368 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4369 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4370 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4371 if (!pPage->cPresent)
4372 break;
4373 }
4374 }
4375}
4376
4377
4378/**
4379 * Clear references to guest physical memory in a PAE / PAE page table.
4380 *
4381 * @param pPool The pool.
4382 * @param pPage The page.
4383 * @param pShwPT The shadow page table (mapping of the page).
4384 * @param pGstPT The guest page table.
4385 */
4386DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4387{
4388 RTGCPHYS const fPgMask = pPage->fA20Enabled ? X86_PTE_PAE_PG_MASK : X86_PTE_PAE_PG_MASK & ~RT_BIT_64(20);
4389 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4390 {
4391 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4392 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4393 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4394 {
4395 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4396 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4397 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4398 if (!pPage->cPresent)
4399 break;
4400 }
4401 }
4402}
4403
4404
4405/**
4406 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4407 *
4408 * @param pPool The pool.
4409 * @param pPage The page.
4410 * @param pShwPT The shadow page table (mapping of the page).
4411 */
4412DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4413{
4414 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4415 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4416 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4417 {
4418 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4419 if (pShwPT->a[i].n.u1Present)
4420 {
4421 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4422 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4423 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4424 if (!pPage->cPresent)
4425 break;
4426 }
4427 }
4428}
4429
4430
4431/**
4432 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4433 *
4434 * @param pPool The pool.
4435 * @param pPage The page.
4436 * @param pShwPT The shadow page table (mapping of the page).
4437 */
4438DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4439{
4440 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4441 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4442 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4443 {
4444 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4445 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4446 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4447 {
4448 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4449 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4450 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys & GCPhysA20Mask, i);
4451 if (!pPage->cPresent)
4452 break;
4453 }
4454 }
4455}
4456
4457
4458/**
4459 * Clear references to shadowed pages in an EPT page table.
4460 *
4461 * @param pPool The pool.
4462 * @param pPage The page.
4463 * @param pShwPT The shadow page directory pointer table (mapping of the
4464 * page).
4465 */
4466DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4467{
4468 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4469 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4470 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4471 {
4472 Assert((pShwPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4473 if (pShwPT->a[i].n.u1Present)
4474 {
4475 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4476 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4477 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4478 if (!pPage->cPresent)
4479 break;
4480 }
4481 }
4482}
4483
4484
4485/**
4486 * Clear references to shadowed pages in a 32 bits page directory.
4487 *
4488 * @param pPool The pool.
4489 * @param pPage The page.
4490 * @param pShwPD The shadow page directory (mapping of the page).
4491 */
4492DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4493{
4494 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4495 {
4496 if ( pShwPD->a[i].n.u1Present
4497 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4498 )
4499 {
4500 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4501 if (pSubPage)
4502 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4503 else
4504 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4505 }
4506 }
4507}
4508
4509
4510/**
4511 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4512 *
4513 * @param pPool The pool.
4514 * @param pPage The page.
4515 * @param pShwPD The shadow page directory (mapping of the page).
4516 */
4517DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4518{
4519 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4520 {
4521 if ( pShwPD->a[i].n.u1Present
4522 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING))
4523 {
4524#ifdef PGM_WITH_LARGE_PAGES
4525 if (pShwPD->a[i].b.u1Size)
4526 {
4527 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4528 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4529 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK,
4530 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4531 i);
4532 }
4533 else
4534#endif
4535 {
4536 Assert((pShwPD->a[i].u & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000000))) == 0);
4537 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4538 if (pSubPage)
4539 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4540 else
4541 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4542 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4543 }
4544 }
4545 }
4546}
4547
4548
4549/**
4550 * Clear references to shadowed pages in a PAE page directory pointer table.
4551 *
4552 * @param pPool The pool.
4553 * @param pPage The page.
4554 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4555 */
4556DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4557{
4558 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4559 {
4560 Assert((pShwPDPT->a[i].u & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4561 if ( pShwPDPT->a[i].n.u1Present
4562 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4563 )
4564 {
4565 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4566 if (pSubPage)
4567 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4568 else
4569 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4570 }
4571 }
4572}
4573
4574
4575/**
4576 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4577 *
4578 * @param pPool The pool.
4579 * @param pPage The page.
4580 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4581 */
4582DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4583{
4584 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4585 {
4586 Assert((pShwPDPT->a[i].u & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4587 if (pShwPDPT->a[i].n.u1Present)
4588 {
4589 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4590 if (pSubPage)
4591 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4592 else
4593 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4594 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4595 }
4596 }
4597}
4598
4599
4600/**
4601 * Clear references to shadowed pages in a 64-bit level 4 page table.
4602 *
4603 * @param pPool The pool.
4604 * @param pPage The page.
4605 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4606 */
4607DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4608{
4609 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4610 {
4611 Assert((pShwPML4->a[i].u & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4612 if (pShwPML4->a[i].n.u1Present)
4613 {
4614 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4615 if (pSubPage)
4616 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4617 else
4618 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4619 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4620 }
4621 }
4622}
4623
4624
4625/**
4626 * Clear references to shadowed pages in an EPT page directory.
4627 *
4628 * @param pPool The pool.
4629 * @param pPage The page.
4630 * @param pShwPD The shadow page directory (mapping of the page).
4631 */
4632DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4633{
4634 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4635 {
4636 Assert((pShwPD->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4637 if (pShwPD->a[i].n.u1Present)
4638 {
4639#ifdef PGM_WITH_LARGE_PAGES
4640 if (pShwPD->a[i].b.u1Size)
4641 {
4642 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4643 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4644 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK,
4645 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4646 i);
4647 }
4648 else
4649#endif
4650 {
4651 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4652 if (pSubPage)
4653 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4654 else
4655 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4656 }
4657 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4658 }
4659 }
4660}
4661
4662
4663/**
4664 * Clear references to shadowed pages in an EPT page directory pointer table.
4665 *
4666 * @param pPool The pool.
4667 * @param pPage The page.
4668 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4669 */
4670DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4671{
4672 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4673 {
4674 Assert((pShwPDPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4675 if (pShwPDPT->a[i].n.u1Present)
4676 {
4677 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4678 if (pSubPage)
4679 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4680 else
4681 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4682 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4683 }
4684 }
4685}
4686
4687
4688/**
4689 * Clears all references made by this page.
4690 *
4691 * This includes other shadow pages and GC physical addresses.
4692 *
4693 * @param pPool The pool.
4694 * @param pPage The page.
4695 */
4696static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4697{
4698 /*
4699 * Map the shadow page and take action according to the page kind.
4700 */
4701 PVMCC pVM = pPool->CTX_SUFF(pVM);
4702 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4703 switch (pPage->enmKind)
4704 {
4705 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4706 {
4707 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4708 void *pvGst;
4709 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4710 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4711 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4712 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4713 break;
4714 }
4715
4716 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4717 {
4718 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4719 void *pvGst;
4720 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4721 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
4722 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4723 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4724 break;
4725 }
4726
4727 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4728 {
4729 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4730 void *pvGst;
4731 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4732 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
4733 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4734 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4735 break;
4736 }
4737
4738 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4739 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4740 {
4741 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4742 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4743 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4744 break;
4745 }
4746
4747 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4748 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4749 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4750 {
4751 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4752 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
4753 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4754 break;
4755 }
4756
4757 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4758 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4759 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4760 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4761 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4762 case PGMPOOLKIND_PAE_PD_PHYS:
4763 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4764 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4765 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4766 break;
4767
4768 case PGMPOOLKIND_32BIT_PD_PHYS:
4769 case PGMPOOLKIND_32BIT_PD:
4770 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4771 break;
4772
4773 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4774 case PGMPOOLKIND_PAE_PDPT:
4775 case PGMPOOLKIND_PAE_PDPT_PHYS:
4776 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4777 break;
4778
4779 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4780 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4781 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4782 break;
4783
4784 case PGMPOOLKIND_64BIT_PML4:
4785 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4786 break;
4787
4788 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4789 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4790 break;
4791
4792 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4793 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4794 break;
4795
4796 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4797 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4798 break;
4799
4800 default:
4801 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4802 }
4803
4804 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4805 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4806 ASMMemZeroPage(pvShw);
4807 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4808 pPage->fZeroed = true;
4809 Assert(!pPage->cPresent);
4810 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
4811}
4812
4813
4814/**
4815 * Flushes a pool page.
4816 *
4817 * This moves the page to the free list after removing all user references to it.
4818 *
4819 * @returns VBox status code.
4820 * @retval VINF_SUCCESS on success.
4821 * @param pPool The pool.
4822 * @param pPage The shadow page.
4823 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4824 */
4825int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4826{
4827 PVMCC pVM = pPool->CTX_SUFF(pVM);
4828 bool fFlushRequired = false;
4829
4830 int rc = VINF_SUCCESS;
4831 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4832 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4833 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4834
4835 /*
4836 * Reject any attempts at flushing any of the special root pages (shall
4837 * not happen).
4838 */
4839 AssertMsgReturn(pPage->idx >= PGMPOOL_IDX_FIRST,
4840 ("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n",
4841 pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx),
4842 VINF_SUCCESS);
4843
4844 pgmLock(pVM);
4845
4846 /*
4847 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4848 */
4849 if (pgmPoolIsPageLocked(pPage))
4850 {
4851 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4852 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4853 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4854 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4855 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4856 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4857 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4858 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4859 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
4860 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
4861 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4862 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4863 pgmUnlock(pVM);
4864 return VINF_SUCCESS;
4865 }
4866
4867#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4868 /* Start a subset so we won't run out of mapping space. */
4869 PVMCPU pVCpu = VMMGetCpu(pVM);
4870 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
4871#endif
4872
4873 /*
4874 * Mark the page as being in need of an ASMMemZeroPage().
4875 */
4876 pPage->fZeroed = false;
4877
4878#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4879 if (pPage->fDirty)
4880 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirtyEntry, false /* do not remove */);
4881#endif
4882
4883 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4884 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4885 fFlushRequired = true;
4886
4887 /*
4888 * Clear the page.
4889 */
4890 pgmPoolTrackClearPageUsers(pPool, pPage);
4891 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4892 pgmPoolTrackDeref(pPool, pPage);
4893 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4894
4895 /*
4896 * Flush it from the cache.
4897 */
4898 pgmPoolCacheFlushPage(pPool, pPage);
4899
4900#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4901 /* Heavy stuff done. */
4902 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
4903#endif
4904
4905 /*
4906 * Deregistering the monitoring.
4907 */
4908 if (pPage->fMonitored)
4909 rc = pgmPoolMonitorFlush(pPool, pPage);
4910
4911 /*
4912 * Free the page.
4913 */
4914 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4915 pPage->iNext = pPool->iFreeHead;
4916 pPool->iFreeHead = pPage->idx;
4917 pPage->enmKind = PGMPOOLKIND_FREE;
4918 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4919 pPage->GCPhys = NIL_RTGCPHYS;
4920 pPage->fReusedFlushPending = false;
4921
4922 pPool->cUsedPages--;
4923
4924 /* Flush the TLBs of all VCPUs if required. */
4925 if ( fFlushRequired
4926 && fFlush)
4927 {
4928 PGM_INVL_ALL_VCPU_TLBS(pVM);
4929 }
4930
4931 pgmUnlock(pVM);
4932 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4933 return rc;
4934}
4935
4936
4937/**
4938 * Frees a usage of a pool page.
4939 *
4940 * The caller is responsible to updating the user table so that it no longer
4941 * references the shadow page.
4942 *
4943 * @param pPool The pool.
4944 * @param pPage The shadow page.
4945 * @param iUser The shadow page pool index of the user table.
4946 * NIL_PGMPOOL_IDX for root pages.
4947 * @param iUserTable The index into the user table (shadowed). Ignored if
4948 * root page.
4949 */
4950void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4951{
4952 PVMCC pVM = pPool->CTX_SUFF(pVM);
4953
4954 STAM_PROFILE_START(&pPool->StatFree, a);
4955 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
4956 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4957 AssertReturnVoid(pPage->idx >= PGMPOOL_IDX_FIRST); /* paranoia (#6349) */
4958
4959 pgmLock(pVM);
4960 if (iUser != NIL_PGMPOOL_IDX)
4961 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4962 if (!pPage->fCached)
4963 pgmPoolFlushPage(pPool, pPage);
4964 pgmUnlock(pVM);
4965 STAM_PROFILE_STOP(&pPool->StatFree, a);
4966}
4967
4968
4969/**
4970 * Makes one or more free page free.
4971 *
4972 * @returns VBox status code.
4973 * @retval VINF_SUCCESS on success.
4974 *
4975 * @param pPool The pool.
4976 * @param enmKind Page table kind
4977 * @param iUser The user of the page.
4978 */
4979static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4980{
4981 PVMCC pVM = pPool->CTX_SUFF(pVM);
4982 LogFlow(("pgmPoolMakeMoreFreePages: enmKind=%d iUser=%d\n", enmKind, iUser));
4983 NOREF(enmKind);
4984
4985 /*
4986 * If the pool isn't full grown yet, expand it.
4987 */
4988const char *pszTmp = "pgmPoolMakeMoreFreePages/no-growth";
4989 if (pPool->cCurPages < pPool->cMaxPages)
4990 {
4991 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4992#ifdef IN_RING3
4993 int rc = PGMR3PoolGrow(pVM, VMMGetCpu(pVM));
4994#else
4995 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
4996#endif
4997 if (rc == VINF_SUCCESS)
4998 { /* likely */ }
4999 else
5000 {
5001 if (RT_FAILURE(rc))
5002 return rc;
5003 }
5004
5005 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
5006 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
5007 return VINF_SUCCESS;
5008pszTmp = "pgmPoolMakeMoreFreePages/grew-it";
5009 }
5010
5011 /*
5012 * Free one cached page.
5013 */
5014 return pgmPoolCacheFreeOne(pPool, iUser, pszTmp);
5015}
5016
5017
5018/**
5019 * Allocates a page from the pool.
5020 *
5021 * This page may actually be a cached page and not in need of any processing
5022 * on the callers part.
5023 *
5024 * @returns VBox status code.
5025 * @retval VINF_SUCCESS if a NEW page was allocated.
5026 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
5027 *
5028 * @param pVM The cross context VM structure.
5029 * @param GCPhys The GC physical address of the page we're gonna shadow.
5030 * For 4MB and 2MB PD entries, it's the first address the
5031 * shadow PT is covering.
5032 * @param enmKind The kind of mapping.
5033 * @param enmAccess Access type for the mapping (only relevant for big pages)
5034 * @param fA20Enabled Whether the A20 gate is enabled or not.
5035 * @param iUser The shadow page pool index of the user table. Root
5036 * pages should pass NIL_PGMPOOL_IDX.
5037 * @param iUserTable The index into the user table (shadowed). Ignored for
5038 * root pages (iUser == NIL_PGMPOOL_IDX).
5039 * @param fLockPage Lock the page
5040 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
5041 */
5042int pgmPoolAlloc(PVMCC pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
5043 uint16_t iUser, uint32_t iUserTable, bool fLockPage, PPPGMPOOLPAGE ppPage)
5044{
5045 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5046 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
5047 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
5048 *ppPage = NULL;
5049 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
5050 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
5051 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
5052
5053 pgmLock(pVM);
5054
5055 if (pPool->fCacheEnabled)
5056 {
5057 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, fA20Enabled, iUser, iUserTable, ppPage);
5058 if (RT_SUCCESS(rc2))
5059 {
5060 if (fLockPage)
5061 pgmPoolLockPage(pPool, *ppPage);
5062 pgmUnlock(pVM);
5063 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5064 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
5065 return rc2;
5066 }
5067 }
5068
5069 /*
5070 * Allocate a new one.
5071 */
5072 int rc = VINF_SUCCESS;
5073 uint16_t iNew = pPool->iFreeHead;
5074 if (iNew == NIL_PGMPOOL_IDX)
5075 {
5076 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
5077 if (RT_FAILURE(rc))
5078 {
5079 pgmUnlock(pVM);
5080 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
5081 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5082 return rc;
5083 }
5084 iNew = pPool->iFreeHead;
5085 AssertReleaseMsgReturn(iNew != NIL_PGMPOOL_IDX, ("iNew=%#x\n", iNew), VERR_PGM_POOL_IPE);
5086 }
5087
5088 /* unlink the free head */
5089 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5090 pPool->iFreeHead = pPage->iNext;
5091 pPage->iNext = NIL_PGMPOOL_IDX;
5092
5093 /*
5094 * Initialize it.
5095 */
5096 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5097 pPage->enmKind = enmKind;
5098 pPage->enmAccess = enmAccess;
5099 pPage->GCPhys = GCPhys;
5100 pPage->fA20Enabled = fA20Enabled;
5101 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5102 pPage->fMonitored = false;
5103 pPage->fCached = false;
5104 pPage->fDirty = false;
5105 pPage->fReusedFlushPending = false;
5106 pPage->cModifications = 0;
5107 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5108 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5109 pPage->cPresent = 0;
5110 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5111 pPage->idxDirtyEntry = 0;
5112 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5113 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5114 pPage->cLastAccessHandler = 0;
5115 pPage->cLocked = 0;
5116# ifdef VBOX_STRICT
5117 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5118# endif
5119
5120 /*
5121 * Insert into the tracking and cache. If this fails, free the page.
5122 */
5123 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5124 if (RT_FAILURE(rc3))
5125 {
5126 pPool->cUsedPages--;
5127 pPage->enmKind = PGMPOOLKIND_FREE;
5128 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5129 pPage->GCPhys = NIL_RTGCPHYS;
5130 pPage->iNext = pPool->iFreeHead;
5131 pPool->iFreeHead = pPage->idx;
5132 pgmUnlock(pVM);
5133 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5134 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
5135 return rc3;
5136 }
5137
5138 /*
5139 * Commit the allocation, clear the page and return.
5140 */
5141#ifdef VBOX_WITH_STATISTICS
5142 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5143 pPool->cUsedPagesHigh = pPool->cUsedPages;
5144#endif
5145
5146 if (!pPage->fZeroed)
5147 {
5148 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5149 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5150 ASMMemZeroPage(pv);
5151 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5152 }
5153
5154 *ppPage = pPage;
5155 if (fLockPage)
5156 pgmPoolLockPage(pPool, pPage);
5157 pgmUnlock(pVM);
5158 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5159 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5160 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5161 return rc;
5162}
5163
5164
5165/**
5166 * Frees a usage of a pool page.
5167 *
5168 * @param pVM The cross context VM structure.
5169 * @param HCPhys The HC physical address of the shadow page.
5170 * @param iUser The shadow page pool index of the user table.
5171 * NIL_PGMPOOL_IDX if root page.
5172 * @param iUserTable The index into the user table (shadowed). Ignored if
5173 * root page.
5174 */
5175void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5176{
5177 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5178 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5179 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5180}
5181
5182
5183/**
5184 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5185 *
5186 * @returns Pointer to the shadow page structure.
5187 * @param pPool The pool.
5188 * @param HCPhys The HC physical address of the shadow page.
5189 */
5190PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5191{
5192 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5193
5194 /*
5195 * Look up the page.
5196 */
5197 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5198
5199 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5200 return pPage;
5201}
5202
5203
5204/**
5205 * Internal worker for finding a page for debugging purposes, no assertions.
5206 *
5207 * @returns Pointer to the shadow page structure. NULL on if not found.
5208 * @param pPool The pool.
5209 * @param HCPhys The HC physical address of the shadow page.
5210 */
5211PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5212{
5213 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5214 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5215}
5216
5217
5218/**
5219 * Internal worker for PGM_HCPHYS_2_PTR.
5220 *
5221 * @returns VBox status code.
5222 * @param pVM The cross context VM structure.
5223 * @param HCPhys The HC physical address of the shadow page.
5224 * @param ppv Where to return the address.
5225 */
5226int pgmPoolHCPhys2Ptr(PVM pVM, RTHCPHYS HCPhys, void **ppv)
5227{
5228 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pVM->pgm.s.CTX_SUFF(pPool)->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5229 AssertMsgReturn(pPage && pPage->enmKind != PGMPOOLKIND_FREE,
5230 ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0),
5231 VERR_PGM_POOL_GET_PAGE_FAILED);
5232 *ppv = (uint8_t *)pPage->CTX_SUFF(pvPage) + (HCPhys & PAGE_OFFSET_MASK);
5233 return VINF_SUCCESS;
5234}
5235
5236#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5237
5238/**
5239 * Flush the specified page if present
5240 *
5241 * @param pVM The cross context VM structure.
5242 * @param GCPhys Guest physical address of the page to flush
5243 */
5244void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5245{
5246 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5247
5248 VM_ASSERT_EMT(pVM);
5249
5250 /*
5251 * Look up the GCPhys in the hash.
5252 */
5253 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5254 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5255 if (i == NIL_PGMPOOL_IDX)
5256 return;
5257
5258 do
5259 {
5260 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5261 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5262 {
5263 switch (pPage->enmKind)
5264 {
5265 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5266 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5267 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5268 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5269 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5270 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5271 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5272 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5273 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5274 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5275 case PGMPOOLKIND_64BIT_PML4:
5276 case PGMPOOLKIND_32BIT_PD:
5277 case PGMPOOLKIND_PAE_PDPT:
5278 {
5279 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5280# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5281 if (pPage->fDirty)
5282 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5283 else
5284# endif
5285 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5286 Assert(!pgmPoolIsPageLocked(pPage));
5287 pgmPoolMonitorChainFlush(pPool, pPage);
5288 return;
5289 }
5290
5291 /* ignore, no monitoring. */
5292 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5293 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5294 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5295 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5296 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5297 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5298 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5299 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5300 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5301 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5302 case PGMPOOLKIND_ROOT_NESTED:
5303 case PGMPOOLKIND_PAE_PD_PHYS:
5304 case PGMPOOLKIND_PAE_PDPT_PHYS:
5305 case PGMPOOLKIND_32BIT_PD_PHYS:
5306 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5307 break;
5308
5309 default:
5310 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5311 }
5312 }
5313
5314 /* next */
5315 i = pPage->iNext;
5316 } while (i != NIL_PGMPOOL_IDX);
5317 return;
5318}
5319
5320
5321/**
5322 * Reset CPU on hot plugging.
5323 *
5324 * @param pVM The cross context VM structure.
5325 * @param pVCpu The cross context virtual CPU structure.
5326 */
5327void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5328{
5329 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5330
5331 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5332 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5333 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5334}
5335
5336
5337/**
5338 * Flushes the entire cache.
5339 *
5340 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5341 * this and execute this CR3 flush.
5342 *
5343 * @param pVM The cross context VM structure.
5344 */
5345void pgmR3PoolReset(PVM pVM)
5346{
5347 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5348
5349 PGM_LOCK_ASSERT_OWNER(pVM);
5350 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5351 LogFlow(("pgmR3PoolReset:\n"));
5352
5353 /*
5354 * If there are no pages in the pool, there is nothing to do.
5355 */
5356 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5357 {
5358 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5359 return;
5360 }
5361
5362 /*
5363 * Exit the shadow mode since we're going to clear everything,
5364 * including the root page.
5365 */
5366 VMCC_FOR_EACH_VMCPU(pVM)
5367 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5368 VMCC_FOR_EACH_VMCPU_END(pVM);
5369
5370
5371 /*
5372 * Nuke the free list and reinsert all pages into it.
5373 */
5374 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5375 {
5376 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5377
5378 if (pPage->fMonitored)
5379 pgmPoolMonitorFlush(pPool, pPage);
5380 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5381 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5382 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5383 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5384 pPage->GCPhys = NIL_RTGCPHYS;
5385 pPage->enmKind = PGMPOOLKIND_FREE;
5386 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5387 Assert(pPage->idx == i);
5388 pPage->iNext = i + 1;
5389 pPage->fA20Enabled = true;
5390 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5391 pPage->fSeenNonGlobal = false;
5392 pPage->fMonitored = false;
5393 pPage->fDirty = false;
5394 pPage->fCached = false;
5395 pPage->fReusedFlushPending = false;
5396 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5397 pPage->cPresent = 0;
5398 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5399 pPage->cModifications = 0;
5400 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5401 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5402 pPage->idxDirtyEntry = 0;
5403 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5404 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5405 pPage->cLastAccessHandler = 0;
5406 pPage->cLocked = 0;
5407# ifdef VBOX_STRICT
5408 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5409# endif
5410 }
5411 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5412 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5413 pPool->cUsedPages = 0;
5414
5415 /*
5416 * Zap and reinitialize the user records.
5417 */
5418 pPool->cPresent = 0;
5419 pPool->iUserFreeHead = 0;
5420 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5421 const unsigned cMaxUsers = pPool->cMaxUsers;
5422 for (unsigned i = 0; i < cMaxUsers; i++)
5423 {
5424 paUsers[i].iNext = i + 1;
5425 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5426 paUsers[i].iUserTable = 0xfffffffe;
5427 }
5428 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5429
5430 /*
5431 * Clear all the GCPhys links and rebuild the phys ext free list.
5432 */
5433 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
5434 pRam;
5435 pRam = pRam->CTX_SUFF(pNext))
5436 {
5437 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5438 while (iPage-- > 0)
5439 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5440 }
5441
5442 pPool->iPhysExtFreeHead = 0;
5443 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5444 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5445 for (unsigned i = 0; i < cMaxPhysExts; i++)
5446 {
5447 paPhysExts[i].iNext = i + 1;
5448 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5449 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5450 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5451 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5452 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5453 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5454 }
5455 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5456
5457 /*
5458 * Just zap the modified list.
5459 */
5460 pPool->cModifiedPages = 0;
5461 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5462
5463 /*
5464 * Clear the GCPhys hash and the age list.
5465 */
5466 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5467 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5468 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5469 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5470
5471# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5472 /* Clear all dirty pages. */
5473 pPool->idxFreeDirtyPage = 0;
5474 pPool->cDirtyPages = 0;
5475 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aidxDirtyPages); i++)
5476 pPool->aidxDirtyPages[i] = NIL_PGMPOOL_IDX;
5477# endif
5478
5479 /*
5480 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5481 */
5482 VMCC_FOR_EACH_VMCPU(pVM)
5483 {
5484 /*
5485 * Re-enter the shadowing mode and assert Sync CR3 FF.
5486 */
5487 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5488 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5489 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5490 }
5491 VMCC_FOR_EACH_VMCPU_END(pVM);
5492
5493 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5494}
5495
5496#endif /* IN_RING3 */
5497
5498#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5499/**
5500 * Stringifies a PGMPOOLKIND value.
5501 */
5502static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5503{
5504 switch ((PGMPOOLKIND)enmKind)
5505 {
5506 case PGMPOOLKIND_INVALID:
5507 return "PGMPOOLKIND_INVALID";
5508 case PGMPOOLKIND_FREE:
5509 return "PGMPOOLKIND_FREE";
5510 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5511 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5512 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5513 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5514 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5515 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5516 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5517 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5518 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5519 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5520 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5521 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5522 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5523 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5524 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5525 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5526 case PGMPOOLKIND_32BIT_PD:
5527 return "PGMPOOLKIND_32BIT_PD";
5528 case PGMPOOLKIND_32BIT_PD_PHYS:
5529 return "PGMPOOLKIND_32BIT_PD_PHYS";
5530 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5531 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5532 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5533 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5534 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5535 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5536 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5537 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5538 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5539 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5540 case PGMPOOLKIND_PAE_PD_PHYS:
5541 return "PGMPOOLKIND_PAE_PD_PHYS";
5542 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5543 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5544 case PGMPOOLKIND_PAE_PDPT:
5545 return "PGMPOOLKIND_PAE_PDPT";
5546 case PGMPOOLKIND_PAE_PDPT_PHYS:
5547 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5548 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5549 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5550 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5551 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5552 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5553 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5554 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5555 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5556 case PGMPOOLKIND_64BIT_PML4:
5557 return "PGMPOOLKIND_64BIT_PML4";
5558 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5559 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5560 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5561 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5562 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5563 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5564 case PGMPOOLKIND_ROOT_NESTED:
5565 return "PGMPOOLKIND_ROOT_NESTED";
5566 }
5567 return "Unknown kind!";
5568}
5569#endif /* LOG_ENABLED || VBOX_STRICT */
5570
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette