VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 32544

Last change on this file since 32544 was 32544, checked in by vboxsync, 15 years ago

Disabled pae/32-bit optimization due to XP installation regressions

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 208.5 KB
Line 
1/* $Id: PGMAllPool.cpp 32544 2010-09-16 10:00:23Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2010 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#include <VBox/pgm.h>
24#include <VBox/mm.h>
25#include <VBox/em.h>
26#include <VBox/cpum.h>
27#ifdef IN_RC
28# include <VBox/patm.h>
29#endif
30#include "../PGMInternal.h"
31#include <VBox/vm.h>
32#include "../PGMInline.h"
33#include <VBox/disopcode.h>
34#include <VBox/hwacc_vmx.h>
35
36#include <VBox/log.h>
37#include <VBox/err.h>
38#include <iprt/asm.h>
39#include <iprt/asm-amd64-x86.h>
40#include <iprt/string.h>
41
42
43/*******************************************************************************
44* Internal Functions *
45*******************************************************************************/
46RT_C_DECLS_BEGIN
47static void pgmPoolFlushAllInt(PPGMPOOL pPool);
48DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
49DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
50static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
51static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
52static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53#ifndef IN_RING3
54DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
55#endif
56#ifdef LOG_ENABLED
57static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
58#endif
59#if defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)
60static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
61#endif
62
63int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
64PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
65void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
66void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
67
68RT_C_DECLS_END
69
70
71/**
72 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
73 *
74 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
75 * @param enmKind The page kind.
76 */
77DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
78{
79 switch (enmKind)
80 {
81 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
82 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
83 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
84 return true;
85 default:
86 return false;
87 }
88}
89
90
91/**
92 * Flushes a chain of pages sharing the same access monitor.
93 *
94 * @returns VBox status code suitable for scheduling.
95 * @param pPool The pool.
96 * @param pPage A page in the chain.
97 * @todo VBOXSTRICTRC
98 */
99int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
100{
101 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
102
103 /*
104 * Find the list head.
105 */
106 uint16_t idx = pPage->idx;
107 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
108 {
109 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
110 {
111 idx = pPage->iMonitoredPrev;
112 Assert(idx != pPage->idx);
113 pPage = &pPool->aPages[idx];
114 }
115 }
116
117 /*
118 * Iterate the list flushing each shadow page.
119 */
120 int rc = VINF_SUCCESS;
121 for (;;)
122 {
123 idx = pPage->iMonitoredNext;
124 Assert(idx != pPage->idx);
125 if (pPage->idx >= PGMPOOL_IDX_FIRST)
126 {
127 int rc2 = pgmPoolFlushPage(pPool, pPage);
128 AssertRC(rc2);
129 }
130 /* next */
131 if (idx == NIL_PGMPOOL_IDX)
132 break;
133 pPage = &pPool->aPages[idx];
134 }
135 return rc;
136}
137
138
139/**
140 * Wrapper for getting the current context pointer to the entry being modified.
141 *
142 * @returns VBox status code suitable for scheduling.
143 * @param pVM VM Handle.
144 * @param pvDst Destination address
145 * @param pvSrc Source guest virtual address.
146 * @param GCPhysSrc The source guest physical address.
147 * @param cb Size of data to read
148 */
149DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
150{
151#if defined(IN_RING3)
152 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
153 return VINF_SUCCESS;
154#else
155 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
156 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
157#endif
158}
159
160/**
161 * Process shadow entries before they are changed by the guest.
162 *
163 * For PT entries we will clear them. For PD entries, we'll simply check
164 * for mapping conflicts and set the SyncCR3 FF if found.
165 *
166 * @param pVCpu VMCPU handle
167 * @param pPool The pool.
168 * @param pPage The head page.
169 * @param GCPhysFault The guest physical fault address.
170 * @param uAddress In R0 and GC this is the guest context fault address (flat).
171 * In R3 this is the host context 'fault' address.
172 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
173 */
174void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, unsigned cbWrite)
175{
176 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
177 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
178 PVM pVM = pPool->CTX_SUFF(pVM);
179
180 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))pvAddress, GCPhysFault, cbWrite));
181
182 for (;;)
183 {
184 union
185 {
186 void *pv;
187 PX86PT pPT;
188 PPGMSHWPTPAE pPTPae;
189 PX86PD pPD;
190 PX86PDPAE pPDPae;
191 PX86PDPT pPDPT;
192 PX86PML4 pPML4;
193 } uShw;
194
195 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s\n", pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
196
197 uShw.pv = NULL;
198 switch (pPage->enmKind)
199 {
200 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
201 {
202 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
203 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
204 const unsigned iShw = off / sizeof(X86PTE);
205 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
206 if (uShw.pPT->a[iShw].n.u1Present)
207 {
208 X86PTE GstPte;
209
210 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
211 AssertRC(rc);
212 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
213 pgmPoolTracDerefGCPhysHint(pPool, pPage,
214 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
215 GstPte.u & X86_PTE_PG_MASK,
216 iShw);
217 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
218 }
219 break;
220 }
221
222 /* page/2 sized */
223 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
224 {
225 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
226 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
227 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
228 {
229 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
230 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
231 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
232 {
233 X86PTE GstPte;
234 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
235 AssertRC(rc);
236
237 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
238 pgmPoolTracDerefGCPhysHint(pPool, pPage,
239 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
240 GstPte.u & X86_PTE_PG_MASK,
241 iShw);
242 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
243 }
244 }
245 break;
246 }
247
248 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
249 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
250 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
251 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
252 {
253 unsigned iGst = off / sizeof(X86PDE);
254 unsigned iShwPdpt = iGst / 256;
255 unsigned iShw = (iGst % 256) * 2;
256 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
257
258 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
259 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
260 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
261 {
262 for (unsigned i = 0; i < 2; i++)
263 {
264# ifndef IN_RING0
265 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
266 {
267 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
268 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
269 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
270 break;
271 }
272 else
273# endif /* !IN_RING0 */
274 if (uShw.pPDPae->a[iShw+i].n.u1Present)
275 {
276 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
277 pgmPoolFree(pVM,
278 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
279 pPage->idx,
280 iShw + i);
281 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw+i].u, 0);
282 }
283
284 /* paranoia / a bit assumptive. */
285 if ( (off & 3)
286 && (off & 3) + cbWrite > 4)
287 {
288 const unsigned iShw2 = iShw + 2 + i;
289 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
290 {
291# ifndef IN_RING0
292 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
293 {
294 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
295 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
296 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
297 break;
298 }
299 else
300# endif /* !IN_RING0 */
301 if (uShw.pPDPae->a[iShw2].n.u1Present)
302 {
303 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
304 pgmPoolFree(pVM,
305 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
306 pPage->idx,
307 iShw2);
308 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
309 }
310 }
311 }
312 }
313 }
314 break;
315 }
316
317 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
318 {
319 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
320 const unsigned iShw = off / sizeof(X86PTEPAE);
321 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
322 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
323 {
324 X86PTEPAE GstPte;
325 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
326 AssertRC(rc);
327
328 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
329 pgmPoolTracDerefGCPhysHint(pPool, pPage,
330 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
331 GstPte.u & X86_PTE_PAE_PG_MASK,
332 iShw);
333 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
334 }
335
336 /* paranoia / a bit assumptive. */
337 if ( (off & 7)
338 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
339 {
340 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
341 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
342
343 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
344 {
345 X86PTEPAE GstPte;
346# ifdef IN_RING3
347 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
348# else
349 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
350# endif
351 AssertRC(rc);
352 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
353 pgmPoolTracDerefGCPhysHint(pPool, pPage,
354 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
355 GstPte.u & X86_PTE_PAE_PG_MASK,
356 iShw2);
357 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
358 }
359 }
360 break;
361 }
362
363 case PGMPOOLKIND_32BIT_PD:
364 {
365 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
366 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
367
368 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
369 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
370# ifndef IN_RING0
371 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
372 {
373 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
374 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
375 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
376 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
377 break;
378 }
379# endif /* !IN_RING0 */
380# ifndef IN_RING0
381 else
382# endif /* !IN_RING0 */
383 {
384 if (uShw.pPD->a[iShw].n.u1Present)
385 {
386 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
387 pgmPoolFree(pVM,
388 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
389 pPage->idx,
390 iShw);
391 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
392 }
393 }
394 /* paranoia / a bit assumptive. */
395 if ( (off & 3)
396 && (off & 3) + cbWrite > sizeof(X86PTE))
397 {
398 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
399 if ( iShw2 != iShw
400 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
401 {
402# ifndef IN_RING0
403 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
404 {
405 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
406 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
407 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
408 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
409 break;
410 }
411# endif /* !IN_RING0 */
412# ifndef IN_RING0
413 else
414# endif /* !IN_RING0 */
415 {
416 if (uShw.pPD->a[iShw2].n.u1Present)
417 {
418 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
419 pgmPoolFree(pVM,
420 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
421 pPage->idx,
422 iShw2);
423 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
424 }
425 }
426 }
427 }
428#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
429 if ( uShw.pPD->a[iShw].n.u1Present
430 && !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
431 {
432 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
433# ifdef IN_RC /* TLB load - we're pushing things a bit... */
434 ASMProbeReadByte(pvAddress);
435# endif
436 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
437 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
438 }
439#endif
440 break;
441 }
442
443 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
444 {
445 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
446 const unsigned iShw = off / sizeof(X86PDEPAE);
447 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
448#ifndef IN_RING0
449 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
450 {
451 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
452 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
453 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
454 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
455 break;
456 }
457#endif /* !IN_RING0 */
458 /*
459 * Causes trouble when the guest uses a PDE to refer to the whole page table level
460 * structure. (Invalidate here; faults later on when it tries to change the page
461 * table entries -> recheck; probably only applies to the RC case.)
462 */
463# ifndef IN_RING0
464 else
465# endif /* !IN_RING0 */
466 {
467 if (uShw.pPDPae->a[iShw].n.u1Present)
468 {
469 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
470 pgmPoolFree(pVM,
471 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
472 pPage->idx,
473 iShw);
474 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
475 }
476 }
477 /* paranoia / a bit assumptive. */
478 if ( (off & 7)
479 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
480 {
481 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
482 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
483
484#ifndef IN_RING0
485 if ( iShw2 != iShw
486 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
487 {
488 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
489 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
490 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
491 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
492 break;
493 }
494#endif /* !IN_RING0 */
495# ifndef IN_RING0
496 else
497# endif /* !IN_RING0 */
498 if (uShw.pPDPae->a[iShw2].n.u1Present)
499 {
500 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
501 pgmPoolFree(pVM,
502 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
503 pPage->idx,
504 iShw2);
505 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
506 }
507 }
508 break;
509 }
510
511 case PGMPOOLKIND_PAE_PDPT:
512 {
513 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
514 /*
515 * Hopefully this doesn't happen very often:
516 * - touching unused parts of the page
517 * - messing with the bits of pd pointers without changing the physical address
518 */
519 /* PDPT roots are not page aligned; 32 byte only! */
520 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
521
522 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
523 const unsigned iShw = offPdpt / sizeof(X86PDPE);
524 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
525 {
526# ifndef IN_RING0
527 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
528 {
529 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
530 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
531 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
532 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
533 break;
534 }
535# endif /* !IN_RING0 */
536# ifndef IN_RING0
537 else
538# endif /* !IN_RING0 */
539 if (uShw.pPDPT->a[iShw].n.u1Present)
540 {
541 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
542 pgmPoolFree(pVM,
543 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
544 pPage->idx,
545 iShw);
546 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
547 }
548
549 /* paranoia / a bit assumptive. */
550 if ( (offPdpt & 7)
551 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
552 {
553 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
554 if ( iShw2 != iShw
555 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
556 {
557# ifndef IN_RING0
558 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
559 {
560 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
561 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
562 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
563 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
564 break;
565 }
566# endif /* !IN_RING0 */
567# ifndef IN_RING0
568 else
569# endif /* !IN_RING0 */
570 if (uShw.pPDPT->a[iShw2].n.u1Present)
571 {
572 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
573 pgmPoolFree(pVM,
574 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
575 pPage->idx,
576 iShw2);
577 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
578 }
579 }
580 }
581 }
582 break;
583 }
584
585#ifndef IN_RC
586 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
587 {
588 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
589 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
590 const unsigned iShw = off / sizeof(X86PDEPAE);
591 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
592 if (uShw.pPDPae->a[iShw].n.u1Present)
593 {
594 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
595 pgmPoolFree(pVM,
596 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
597 pPage->idx,
598 iShw);
599 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
600 }
601 /* paranoia / a bit assumptive. */
602 if ( (off & 7)
603 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
604 {
605 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
606 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
607
608 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
609 if (uShw.pPDPae->a[iShw2].n.u1Present)
610 {
611 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
612 pgmPoolFree(pVM,
613 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
614 pPage->idx,
615 iShw2);
616 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
617 }
618 }
619 break;
620 }
621
622 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
623 {
624 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
625 /*
626 * Hopefully this doesn't happen very often:
627 * - messing with the bits of pd pointers without changing the physical address
628 */
629 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
630 const unsigned iShw = off / sizeof(X86PDPE);
631 if (uShw.pPDPT->a[iShw].n.u1Present)
632 {
633 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
634 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
635 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
636 }
637 /* paranoia / a bit assumptive. */
638 if ( (off & 7)
639 && (off & 7) + cbWrite > sizeof(X86PDPE))
640 {
641 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
642 if (uShw.pPDPT->a[iShw2].n.u1Present)
643 {
644 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
645 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
646 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
647 }
648 }
649 break;
650 }
651
652 case PGMPOOLKIND_64BIT_PML4:
653 {
654 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
655 /*
656 * Hopefully this doesn't happen very often:
657 * - messing with the bits of pd pointers without changing the physical address
658 */
659 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
660 const unsigned iShw = off / sizeof(X86PDPE);
661 if (uShw.pPML4->a[iShw].n.u1Present)
662 {
663 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
664 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
665 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
666 }
667 /* paranoia / a bit assumptive. */
668 if ( (off & 7)
669 && (off & 7) + cbWrite > sizeof(X86PDPE))
670 {
671 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
672 if (uShw.pPML4->a[iShw2].n.u1Present)
673 {
674 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
675 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
676 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
677 }
678 }
679 break;
680 }
681#endif /* IN_RING0 */
682
683 default:
684 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
685 }
686 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
687
688 /* next */
689 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
690 return;
691 pPage = &pPool->aPages[pPage->iMonitoredNext];
692 }
693}
694
695# ifndef IN_RING3
696/**
697 * Checks if a access could be a fork operation in progress.
698 *
699 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
700 *
701 * @returns true if it's likly that we're forking, otherwise false.
702 * @param pPool The pool.
703 * @param pDis The disassembled instruction.
704 * @param offFault The access offset.
705 */
706DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
707{
708 /*
709 * i386 linux is using btr to clear X86_PTE_RW.
710 * The functions involved are (2.6.16 source inspection):
711 * clear_bit
712 * ptep_set_wrprotect
713 * copy_one_pte
714 * copy_pte_range
715 * copy_pmd_range
716 * copy_pud_range
717 * copy_page_range
718 * dup_mmap
719 * dup_mm
720 * copy_mm
721 * copy_process
722 * do_fork
723 */
724 if ( pDis->pCurInstr->opcode == OP_BTR
725 && !(offFault & 4)
726 /** @todo Validate that the bit index is X86_PTE_RW. */
727 )
728 {
729 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
730 return true;
731 }
732 return false;
733}
734
735
736/**
737 * Determine whether the page is likely to have been reused.
738 *
739 * @returns true if we consider the page as being reused for a different purpose.
740 * @returns false if we consider it to still be a paging page.
741 * @param pVM VM Handle.
742 * @param pVCpu VMCPU Handle.
743 * @param pRegFrame Trap register frame.
744 * @param pDis The disassembly info for the faulting instruction.
745 * @param pvFault The fault address.
746 *
747 * @remark The REP prefix check is left to the caller because of STOSD/W.
748 */
749DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
750{
751#ifndef IN_RC
752 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
753 if ( HWACCMHasPendingIrq(pVM)
754 && (pRegFrame->rsp - pvFault) < 32)
755 {
756 /* Fault caused by stack writes while trying to inject an interrupt event. */
757 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
758 return true;
759 }
760#else
761 NOREF(pVM); NOREF(pvFault);
762#endif
763
764 LogFlow(("Reused instr %RGv %d at %RGv param1.flags=%x param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->opcode, pvFault, pDis->param1.flags, pDis->param1.base.reg_gen));
765
766 /* Non-supervisor mode write means it's used for something else. */
767 if (CPUMGetGuestCPL(pVCpu, pRegFrame) != 0)
768 return true;
769
770 switch (pDis->pCurInstr->opcode)
771 {
772 /* call implies the actual push of the return address faulted */
773 case OP_CALL:
774 Log4(("pgmPoolMonitorIsReused: CALL\n"));
775 return true;
776 case OP_PUSH:
777 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
778 return true;
779 case OP_PUSHF:
780 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
781 return true;
782 case OP_PUSHA:
783 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
784 return true;
785 case OP_FXSAVE:
786 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
787 return true;
788 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
789 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
790 return true;
791 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
792 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
793 return true;
794 case OP_MOVSWD:
795 case OP_STOSWD:
796 if ( pDis->prefix == (PREFIX_REP|PREFIX_REX)
797 && pRegFrame->rcx >= 0x40
798 )
799 {
800 Assert(pDis->mode == CPUMODE_64BIT);
801
802 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
803 return true;
804 }
805 return false;
806 }
807 if ( ( (pDis->param1.flags & USE_REG_GEN32)
808 || (pDis->param1.flags & USE_REG_GEN64))
809 && (pDis->param1.base.reg_gen == USE_REG_ESP))
810 {
811 Log4(("pgmPoolMonitorIsReused: ESP\n"));
812 return true;
813 }
814
815 return false;
816}
817
818/**
819 * Flushes the page being accessed.
820 *
821 * @returns VBox status code suitable for scheduling.
822 * @param pVM The VM handle.
823 * @param pVCpu The VMCPU handle.
824 * @param pPool The pool.
825 * @param pPage The pool page (head).
826 * @param pDis The disassembly of the write instruction.
827 * @param pRegFrame The trap register frame.
828 * @param GCPhysFault The fault address as guest physical address.
829 * @param pvFault The fault address.
830 * @todo VBOXSTRICTRC
831 */
832static int pgmPoolAccessHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
833 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
834{
835 /*
836 * First, do the flushing.
837 */
838 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
839
840 /*
841 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
842 * Must do this in raw mode (!); XP boot will fail otherwise.
843 */
844 uint32_t cbWritten;
845 VBOXSTRICTRC rc2 = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL, &cbWritten);
846 if (RT_SUCCESS(rc2))
847 {
848 pRegFrame->rip += pDis->opsize;
849 AssertMsg(rc2 == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
850 }
851 else if (rc2 == VERR_EM_INTERPRETER)
852 {
853#ifdef IN_RC
854 if (PATMIsPatchGCAddr(pVM, pRegFrame->eip))
855 {
856 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
857 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
858 rc = VINF_SUCCESS;
859 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
860 }
861 else
862#endif
863 {
864 rc = VINF_EM_RAW_EMULATE_INSTR;
865 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
866 }
867 }
868 else
869 rc = VBOXSTRICTRC_VAL(rc2);
870
871 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
872 return rc;
873}
874
875/**
876 * Handles the STOSD write accesses.
877 *
878 * @returns VBox status code suitable for scheduling.
879 * @param pVM The VM handle.
880 * @param pPool The pool.
881 * @param pPage The pool page (head).
882 * @param pDis The disassembly of the write instruction.
883 * @param pRegFrame The trap register frame.
884 * @param GCPhysFault The fault address as guest physical address.
885 * @param pvFault The fault address.
886 */
887DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
888 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
889{
890 unsigned uIncrement = pDis->param1.size;
891
892 Assert(pDis->mode == CPUMODE_32BIT || pDis->mode == CPUMODE_64BIT);
893 Assert(pRegFrame->rcx <= 0x20);
894
895#ifdef VBOX_STRICT
896 if (pDis->opmode == CPUMODE_32BIT)
897 Assert(uIncrement == 4);
898 else
899 Assert(uIncrement == 8);
900#endif
901
902 Log3(("pgmPoolAccessHandlerSTOSD\n"));
903
904 /*
905 * Increment the modification counter and insert it into the list
906 * of modified pages the first time.
907 */
908 if (!pPage->cModifications++)
909 pgmPoolMonitorModifiedInsert(pPool, pPage);
910
911 /*
912 * Execute REP STOSD.
913 *
914 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
915 * write situation, meaning that it's safe to write here.
916 */
917 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
918 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
919 while (pRegFrame->rcx)
920 {
921#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
922 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
923 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
924 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
925#else
926 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
927#endif
928#ifdef IN_RC
929 *(uint32_t *)(uintptr_t)pu32 = pRegFrame->eax;
930#else
931 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
932#endif
933 pu32 += uIncrement;
934 GCPhysFault += uIncrement;
935 pRegFrame->rdi += uIncrement;
936 pRegFrame->rcx--;
937 }
938 pRegFrame->rip += pDis->opsize;
939
940 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
941 return VINF_SUCCESS;
942}
943
944
945/**
946 * Handles the simple write accesses.
947 *
948 * @returns VBox status code suitable for scheduling.
949 * @param pVM The VM handle.
950 * @param pVCpu The VMCPU handle.
951 * @param pPool The pool.
952 * @param pPage The pool page (head).
953 * @param pDis The disassembly of the write instruction.
954 * @param pRegFrame The trap register frame.
955 * @param GCPhysFault The fault address as guest physical address.
956 * @param pvFault The fault address.
957 * @param pfReused Reused state (out)
958 */
959DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
960 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
961{
962 Log3(("pgmPoolAccessHandlerSimple\n"));
963 /*
964 * Increment the modification counter and insert it into the list
965 * of modified pages the first time.
966 */
967 if (!pPage->cModifications++)
968 pgmPoolMonitorModifiedInsert(pPool, pPage);
969
970 /*
971 * Clear all the pages. ASSUMES that pvFault is readable.
972 */
973#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
974 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
975 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
976 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
977#else
978 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
979#endif
980
981 /*
982 * Interpret the instruction.
983 */
984 uint32_t cb;
985 VBOXSTRICTRC rc = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL, &cb);
986 if (RT_SUCCESS(rc))
987 {
988 pRegFrame->rip += pDis->opsize;
989 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
990 }
991 else if (rc == VERR_EM_INTERPRETER)
992 {
993 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
994 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode));
995 rc = VINF_EM_RAW_EMULATE_INSTR;
996 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
997 }
998
999#if 0 /* experimental code */
1000 if (rc == VINF_SUCCESS)
1001 {
1002 switch (pPage->enmKind)
1003 {
1004 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1005 {
1006 X86PTEPAE GstPte;
1007 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1008 AssertRC(rc);
1009
1010 /* Check the new value written by the guest. If present and with a bogus physical address, then
1011 * it's fairly safe to assume the guest is reusing the PT.
1012 */
1013 if (GstPte.n.u1Present)
1014 {
1015 RTHCPHYS HCPhys = -1;
1016 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1017 if (rc != VINF_SUCCESS)
1018 {
1019 *pfReused = true;
1020 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1021 }
1022 }
1023 break;
1024 }
1025 }
1026 }
1027#endif
1028
1029 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", VBOXSTRICTRC_VAL(rc), cb));
1030 return VBOXSTRICTRC_VAL(rc);
1031}
1032
1033/**
1034 * \#PF Handler callback for PT write accesses.
1035 *
1036 * @returns VBox status code (appropriate for GC return).
1037 * @param pVM VM Handle.
1038 * @param uErrorCode CPU Error code.
1039 * @param pRegFrame Trap register frame.
1040 * NULL on DMA and other non CPU access.
1041 * @param pvFault The fault address (cr2).
1042 * @param GCPhysFault The GC physical address corresponding to pvFault.
1043 * @param pvUser User argument.
1044 */
1045DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1046{
1047 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1048 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1049 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1050 PVMCPU pVCpu = VMMGetCpu(pVM);
1051 unsigned cMaxModifications;
1052 bool fForcedFlush = false;
1053
1054 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1055
1056 pgmLock(pVM);
1057 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1058 {
1059 /* Pool page changed while we were waiting for the lock; ignore. */
1060 Log(("CPU%d: pgmPoolAccessHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1061 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1062 pgmUnlock(pVM);
1063 return VINF_SUCCESS;
1064 }
1065#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1066 if (pPage->fDirty)
1067 {
1068 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH));
1069 pgmUnlock(pVM);
1070 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1071 }
1072#endif
1073
1074#if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1075 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1076 {
1077 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1078 void *pvGst;
1079 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1080 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1081 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1082 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1083 }
1084#endif
1085
1086 /*
1087 * Disassemble the faulting instruction.
1088 */
1089 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1090 int rc = EMInterpretDisasOne(pVM, pVCpu, pRegFrame, pDis, NULL);
1091 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1092 {
1093 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1094 pgmUnlock(pVM);
1095 return rc;
1096 }
1097
1098 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1099
1100 /*
1101 * We should ALWAYS have the list head as user parameter. This
1102 * is because we use that page to record the changes.
1103 */
1104 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1105
1106#ifdef IN_RING0
1107 /* Maximum nr of modifications depends on the page type. */
1108 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1109 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1110 cMaxModifications = 4;
1111 else
1112 cMaxModifications = 24;
1113#else
1114 cMaxModifications = 48;
1115#endif
1116
1117 /*
1118 * Incremental page table updates should weigh more than random ones.
1119 * (Only applies when started from offset 0)
1120 */
1121 pVCpu->pgm.s.cPoolAccessHandler++;
1122 if ( pPage->pvLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1123 && pPage->pvLastAccessHandlerRip < pRegFrame->rip + 0x40
1124 && pvFault == (pPage->pvLastAccessHandlerFault + pDis->param1.size)
1125 && pVCpu->pgm.s.cPoolAccessHandler == (pPage->cLastAccessHandlerCount + 1))
1126 {
1127 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1128 Assert(pPage->cModifications < 32000);
1129 pPage->cModifications = pPage->cModifications * 2;
1130 pPage->pvLastAccessHandlerFault = pvFault;
1131 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1132 if (pPage->cModifications >= cMaxModifications)
1133 {
1134 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1135 fForcedFlush = true;
1136 }
1137 }
1138
1139 if (pPage->cModifications >= cMaxModifications)
1140 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1141
1142 /*
1143 * Check if it's worth dealing with.
1144 */
1145 bool fReused = false;
1146 bool fNotReusedNotForking = false;
1147 if ( ( pPage->cModifications < cMaxModifications /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1148 || pgmPoolIsPageLocked(&pVM->pgm.s, pPage)
1149 )
1150 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1151 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1152 {
1153 /*
1154 * Simple instructions, no REP prefix.
1155 */
1156 if (!(pDis->prefix & (PREFIX_REP | PREFIX_REPNE)))
1157 {
1158 rc = pgmPoolAccessHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1159 if (fReused)
1160 goto flushPage;
1161
1162 /* A mov instruction to change the first page table entry will be remembered so we can detect
1163 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1164 */
1165 if ( rc == VINF_SUCCESS
1166 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1167 && pDis->pCurInstr->opcode == OP_MOV
1168 && (pvFault & PAGE_OFFSET_MASK) == 0)
1169 {
1170 pPage->pvLastAccessHandlerFault = pvFault;
1171 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1172 pPage->pvLastAccessHandlerRip = pRegFrame->rip;
1173 /* Make sure we don't kick out a page too quickly. */
1174 if (pPage->cModifications > 8)
1175 pPage->cModifications = 2;
1176 }
1177 else
1178 if (pPage->pvLastAccessHandlerFault == pvFault)
1179 {
1180 /* ignore the 2nd write to this page table entry. */
1181 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1182 }
1183 else
1184 {
1185 pPage->pvLastAccessHandlerFault = 0;
1186 pPage->pvLastAccessHandlerRip = 0;
1187 }
1188
1189 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1190 pgmUnlock(pVM);
1191 return rc;
1192 }
1193
1194 /*
1195 * Windows is frequently doing small memset() operations (netio test 4k+).
1196 * We have to deal with these or we'll kill the cache and performance.
1197 */
1198 if ( pDis->pCurInstr->opcode == OP_STOSWD
1199 && !pRegFrame->eflags.Bits.u1DF
1200 && pDis->opmode == pDis->mode
1201 && pDis->addrmode == pDis->mode)
1202 {
1203 bool fValidStosd = false;
1204
1205 if ( pDis->mode == CPUMODE_32BIT
1206 && pDis->prefix == PREFIX_REP
1207 && pRegFrame->ecx <= 0x20
1208 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1209 && !((uintptr_t)pvFault & 3)
1210 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1211 )
1212 {
1213 fValidStosd = true;
1214 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1215 }
1216 else
1217 if ( pDis->mode == CPUMODE_64BIT
1218 && pDis->prefix == (PREFIX_REP | PREFIX_REX)
1219 && pRegFrame->rcx <= 0x20
1220 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1221 && !((uintptr_t)pvFault & 7)
1222 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1223 )
1224 {
1225 fValidStosd = true;
1226 }
1227
1228 if (fValidStosd)
1229 {
1230 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1231 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1232 pgmUnlock(pVM);
1233 return rc;
1234 }
1235 }
1236
1237 /* REP prefix, don't bother. */
1238 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1239 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1240 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode, pDis->prefix));
1241 fNotReusedNotForking = true;
1242 }
1243
1244#if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1245 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1246 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1247 */
1248 if ( pPage->cModifications >= cMaxModifications
1249 && !fForcedFlush
1250# if 1
1251 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1252# else
1253 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1254# endif
1255 && ( fNotReusedNotForking
1256 || ( !pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
1257 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1258 )
1259 )
1260 {
1261 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
1262 Assert(pPage->fDirty == false);
1263
1264 /* Flush any monitored duplicates as we will disable write protection. */
1265 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1266 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1267 {
1268 PPGMPOOLPAGE pPageHead = pPage;
1269
1270 /* Find the monitor head. */
1271 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1272 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1273
1274 while (pPageHead)
1275 {
1276 unsigned idxNext = pPageHead->iMonitoredNext;
1277
1278 if (pPageHead != pPage)
1279 {
1280 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1281 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1282 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1283 AssertRC(rc2);
1284 }
1285
1286 if (idxNext == NIL_PGMPOOL_IDX)
1287 break;
1288
1289 pPageHead = &pPool->aPages[idxNext];
1290 }
1291 }
1292
1293 /* The flushing above might fail for locked pages, so double check. */
1294 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1295 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1296 {
1297 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1298
1299 /* Temporarily allow write access to the page table again. */
1300 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK, pPage->GCPhys & PAGE_BASE_GC_MASK);
1301 if (rc == VINF_SUCCESS)
1302 {
1303 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1304 AssertMsg(rc == VINF_SUCCESS
1305 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1306 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1307 || rc == VERR_PAGE_NOT_PRESENT,
1308 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1309
1310 pPage->pvDirtyFault = pvFault;
1311
1312 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1313 pgmUnlock(pVM);
1314 return rc;
1315 }
1316 }
1317 }
1318#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1319
1320 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushModOverflow));
1321flushPage:
1322 /*
1323 * Not worth it, so flush it.
1324 *
1325 * If we considered it to be reused, don't go back to ring-3
1326 * to emulate failed instructions since we usually cannot
1327 * interpret then. This may be a bit risky, in which case
1328 * the reuse detection must be fixed.
1329 */
1330 rc = pgmPoolAccessHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1331 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1332 && fReused)
1333 {
1334 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1335 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1336 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1337 }
1338 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1339 pgmUnlock(pVM);
1340 return rc;
1341}
1342
1343# endif /* !IN_RING3 */
1344
1345# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1346
1347# ifdef VBOX_STRICT
1348/**
1349 * Check references to guest physical memory in a PAE / PAE page table.
1350 *
1351 * @param pPool The pool.
1352 * @param pPage The page.
1353 * @param pShwPT The shadow page table (mapping of the page).
1354 * @param pGstPT The guest page table.
1355 */
1356static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1357{
1358 unsigned cErrors = 0;
1359 int LastRc = -1; /* initialized to shut up gcc */
1360 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1361 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1362 PVM pVM = pPool->CTX_SUFF(pVM);
1363
1364#ifdef VBOX_STRICT
1365 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1366 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1367#endif
1368 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1369 {
1370 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1371 {
1372 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1373 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1374 if ( rc != VINF_SUCCESS
1375 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1376 {
1377 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1378 LastPTE = i;
1379 LastRc = rc;
1380 LastHCPhys = HCPhys;
1381 cErrors++;
1382
1383 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1384 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1385 AssertRC(rc);
1386
1387 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1388 {
1389 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1390
1391 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1392 {
1393 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1394
1395 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1396 {
1397 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1398 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1399 {
1400 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1401 }
1402 }
1403
1404 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1405 }
1406 }
1407 }
1408 }
1409 }
1410 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1411}
1412
1413/**
1414 * Check references to guest physical memory in a PAE / 32-bit page table.
1415 *
1416 * @param pPool The pool.
1417 * @param pPage The page.
1418 * @param pShwPT The shadow page table (mapping of the page).
1419 * @param pGstPT The guest page table.
1420 */
1421static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1422{
1423 unsigned cErrors = 0;
1424 int LastRc = -1; /* initialized to shut up gcc */
1425 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1426 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1427 PVM pVM = pPool->CTX_SUFF(pVM);
1428
1429#ifdef VBOX_STRICT
1430 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1431 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1432#endif
1433 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1434 {
1435 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1436 {
1437 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1438 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1439 if ( rc != VINF_SUCCESS
1440 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1441 {
1442 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1443 LastPTE = i;
1444 LastRc = rc;
1445 LastHCPhys = HCPhys;
1446 cErrors++;
1447
1448 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1449 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1450 AssertRC(rc);
1451
1452 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1453 {
1454 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1455
1456 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1457 {
1458 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1459
1460 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1461 {
1462 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1463 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1464 {
1465 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1466 }
1467 }
1468
1469 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1470 }
1471 }
1472 }
1473 }
1474 }
1475 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1476}
1477
1478# endif /* VBOX_STRICT */
1479
1480/**
1481 * Clear references to guest physical memory in a PAE / PAE page table.
1482 *
1483 * @returns nr of changed PTEs
1484 * @param pPool The pool.
1485 * @param pPage The page.
1486 * @param pShwPT The shadow page table (mapping of the page).
1487 * @param pGstPT The guest page table.
1488 * @param pOldGstPT The old cached guest page table.
1489 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1490 * @param pfFlush Flush reused page table (out)
1491 */
1492DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1493 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1494{
1495 unsigned cChanged = 0;
1496
1497#ifdef VBOX_STRICT
1498 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1499 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1500#endif
1501 *pfFlush = false;
1502
1503 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1504 {
1505 /* Check the new value written by the guest. If present and with a bogus physical address, then
1506 * it's fairly safe to assume the guest is reusing the PT.
1507 */
1508 if ( fAllowRemoval
1509 && pGstPT->a[i].n.u1Present)
1510 {
1511 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1512 {
1513 *pfFlush = true;
1514 return ++cChanged;
1515 }
1516 }
1517 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1518 {
1519 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1520 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1521 {
1522#ifdef VBOX_STRICT
1523 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1524 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1525 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1526#endif
1527 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1528 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1529 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1530 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1531
1532 if ( uHostAttr == uGuestAttr
1533 && fHostRW <= fGuestRW)
1534 continue;
1535 }
1536 cChanged++;
1537 /* Something was changed, so flush it. */
1538 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1539 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1540 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1541 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1542 }
1543 }
1544 return cChanged;
1545}
1546
1547/**
1548 * Clear references to guest physical memory in a PAE / PAE page table.
1549 *
1550 * @returns nr of changed PTEs
1551 * @param pPool The pool.
1552 * @param pPage The page.
1553 * @param pShwPT The shadow page table (mapping of the page).
1554 * @param pGstPT The guest page table.
1555 * @param pOldGstPT The old cached guest page table.
1556 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1557 * @param pfFlush Flush reused page table (out)
1558 */
1559DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1560 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1561{
1562 unsigned cChanged = 0;
1563
1564#ifdef VBOX_STRICT
1565 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1566 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1567#endif
1568 *pfFlush = false;
1569
1570 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1571 {
1572 /* Check the new value written by the guest. If present and with a bogus physical address, then
1573 * it's fairly safe to assume the guest is reusing the PT.
1574 */
1575 if ( fAllowRemoval
1576 && pGstPT->a[i].n.u1Present)
1577 {
1578 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK))
1579 {
1580 *pfFlush = true;
1581 return ++cChanged;
1582 }
1583 }
1584 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1585 {
1586 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1587 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1588 {
1589#ifdef VBOX_STRICT
1590 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1591 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1592 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1593#endif
1594 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1595 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1596 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1597 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1598
1599 if ( uHostAttr == uGuestAttr
1600 && fHostRW <= fGuestRW)
1601 continue;
1602 }
1603 cChanged++;
1604 /* Something was changed, so flush it. */
1605 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1606 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1607 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1608 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1609 }
1610 }
1611 return cChanged;
1612}
1613
1614/**
1615 * Flush a dirty page
1616 *
1617 * @param pVM VM Handle.
1618 * @param pPool The pool.
1619 * @param idxSlot Dirty array slot index
1620 * @param fAllowRemoval Allow a reused page table to be removed
1621 */
1622static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1623{
1624 PPGMPOOLPAGE pPage;
1625 unsigned idxPage;
1626
1627 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1628 if (pPool->aDirtyPages[idxSlot].uIdx == NIL_PGMPOOL_IDX)
1629 return;
1630
1631 idxPage = pPool->aDirtyPages[idxSlot].uIdx;
1632 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1633 pPage = &pPool->aPages[idxPage];
1634 Assert(pPage->idx == idxPage);
1635 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1636
1637 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1638 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1639
1640#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1641 PVMCPU pVCpu = VMMGetCpu(pVM);
1642 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
1643#endif
1644
1645 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1646 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK);
1647 Assert(rc == VINF_SUCCESS);
1648 pPage->fDirty = false;
1649
1650#ifdef VBOX_STRICT
1651 uint64_t fFlags = 0;
1652 RTHCPHYS HCPhys;
1653 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->pvDirtyFault, &fFlags, &HCPhys);
1654 AssertMsg( ( rc == VINF_SUCCESS
1655 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1656 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1657 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1658 || rc == VERR_PAGE_NOT_PRESENT,
1659 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->pvDirtyFault, rc, fFlags));
1660#endif
1661
1662 /* Flush those PTEs that have changed. */
1663 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1664 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1665 void *pvGst;
1666 rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1667 bool fFlush;
1668 unsigned cChanges;
1669
1670 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1671 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1672 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1673 else
1674 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1675 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1676
1677 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1678 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1679 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1680 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1681
1682 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1683 Assert(pPage->cModifications);
1684 if (cChanges < 4)
1685 pPage->cModifications = 1; /* must use > 0 here */
1686 else
1687 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1688
1689 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1690 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1691 pPool->idxFreeDirtyPage = idxSlot;
1692
1693 pPool->cDirtyPages--;
1694 pPool->aDirtyPages[idxSlot].uIdx = NIL_PGMPOOL_IDX;
1695 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1696 if (fFlush)
1697 {
1698 Assert(fAllowRemoval);
1699 Log(("Flush reused page table!\n"));
1700 pgmPoolFlushPage(pPool, pPage);
1701 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1702 }
1703 else
1704 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1705
1706#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1707 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
1708#endif
1709}
1710
1711# ifndef IN_RING3
1712/**
1713 * Add a new dirty page
1714 *
1715 * @param pVM VM Handle.
1716 * @param pPool The pool.
1717 * @param pPage The page.
1718 */
1719void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1720{
1721 unsigned idxFree;
1722
1723 Assert(PGMIsLocked(pVM));
1724 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1725 Assert(!pPage->fDirty);
1726
1727 idxFree = pPool->idxFreeDirtyPage;
1728 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1729 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1730
1731 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1732 {
1733 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1734 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1735 }
1736 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1737 AssertMsg(pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1738
1739 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1740
1741 /*
1742 * Make a copy of the guest page table as we require valid GCPhys addresses
1743 * when removing references to physical pages.
1744 * (The HCPhys linear lookup is *extremely* expensive!)
1745 */
1746 void *pvGst;
1747 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1748 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst, (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT) ? PAGE_SIZE : PAGE_SIZE/2);
1749#ifdef VBOX_STRICT
1750 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1751 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1752 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1753 else
1754 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1755 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1756#endif
1757 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1758
1759 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1760 pPage->fDirty = true;
1761 pPage->idxDirty = idxFree;
1762 pPool->aDirtyPages[idxFree].uIdx = pPage->idx;
1763 pPool->cDirtyPages++;
1764
1765 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1766 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1767 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1768 {
1769 unsigned i;
1770 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1771 {
1772 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1773 if (pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX)
1774 {
1775 pPool->idxFreeDirtyPage = idxFree;
1776 break;
1777 }
1778 }
1779 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1780 }
1781
1782 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX);
1783 return;
1784}
1785# endif /* !IN_RING3 */
1786
1787/**
1788 * Check if the specified page is dirty (not write monitored)
1789 *
1790 * @return dirty or not
1791 * @param pVM VM Handle.
1792 * @param GCPhys Guest physical address
1793 */
1794bool pgmPoolIsDirtyPage(PVM pVM, RTGCPHYS GCPhys)
1795{
1796 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1797 Assert(PGMIsLocked(pVM));
1798 if (!pPool->cDirtyPages)
1799 return false;
1800
1801 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1802
1803 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1804 {
1805 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
1806 {
1807 PPGMPOOLPAGE pPage;
1808 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
1809
1810 pPage = &pPool->aPages[idxPage];
1811 if (pPage->GCPhys == GCPhys)
1812 return true;
1813 }
1814 }
1815 return false;
1816}
1817
1818/**
1819 * Reset all dirty pages by reinstating page monitoring.
1820 *
1821 * @param pVM VM Handle.
1822 */
1823void pgmPoolResetDirtyPages(PVM pVM)
1824{
1825 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1826 Assert(PGMIsLocked(pVM));
1827 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1828
1829 if (!pPool->cDirtyPages)
1830 return;
1831
1832 Log(("pgmPoolResetDirtyPages\n"));
1833 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1834 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1835
1836 pPool->idxFreeDirtyPage = 0;
1837 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1838 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1839 {
1840 unsigned i;
1841 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1842 {
1843 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
1844 {
1845 pPool->idxFreeDirtyPage = i;
1846 break;
1847 }
1848 }
1849 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1850 }
1851
1852 Assert(pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
1853 return;
1854}
1855
1856/**
1857 * Invalidate the PT entry for the specified page
1858 *
1859 * @param pVM VM Handle.
1860 * @param GCPtrPage Guest page to invalidate
1861 */
1862void pgmPoolResetDirtyPage(PVM pVM, RTGCPTR GCPtrPage)
1863{
1864 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1865 Assert(PGMIsLocked(pVM));
1866 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1867
1868 if (!pPool->cDirtyPages)
1869 return;
1870
1871 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage));
1872 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1873 {
1874 }
1875}
1876
1877/**
1878 * Reset all dirty pages by reinstating page monitoring.
1879 *
1880 * @param pVM VM Handle.
1881 * @param GCPhysPT Physical address of the page table
1882 */
1883void pgmPoolInvalidateDirtyPage(PVM pVM, RTGCPHYS GCPhysPT)
1884{
1885 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1886 Assert(PGMIsLocked(pVM));
1887 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1888 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
1889
1890 if (!pPool->cDirtyPages)
1891 return;
1892
1893 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1894
1895 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1896 {
1897 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
1898 {
1899 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
1900
1901 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1902 if (pPage->GCPhys == GCPhysPT)
1903 {
1904 idxDirtyPage = i;
1905 break;
1906 }
1907 }
1908 }
1909
1910 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
1911 {
1912 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
1913 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1914 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1915 {
1916 unsigned i;
1917 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1918 {
1919 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
1920 {
1921 pPool->idxFreeDirtyPage = i;
1922 break;
1923 }
1924 }
1925 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1926 }
1927 }
1928}
1929
1930# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1931
1932/**
1933 * Inserts a page into the GCPhys hash table.
1934 *
1935 * @param pPool The pool.
1936 * @param pPage The page.
1937 */
1938DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1939{
1940 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1941 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1942 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1943 pPage->iNext = pPool->aiHash[iHash];
1944 pPool->aiHash[iHash] = pPage->idx;
1945}
1946
1947
1948/**
1949 * Removes a page from the GCPhys hash table.
1950 *
1951 * @param pPool The pool.
1952 * @param pPage The page.
1953 */
1954DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1955{
1956 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1957 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1958 if (pPool->aiHash[iHash] == pPage->idx)
1959 pPool->aiHash[iHash] = pPage->iNext;
1960 else
1961 {
1962 uint16_t iPrev = pPool->aiHash[iHash];
1963 for (;;)
1964 {
1965 const int16_t i = pPool->aPages[iPrev].iNext;
1966 if (i == pPage->idx)
1967 {
1968 pPool->aPages[iPrev].iNext = pPage->iNext;
1969 break;
1970 }
1971 if (i == NIL_PGMPOOL_IDX)
1972 {
1973 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
1974 break;
1975 }
1976 iPrev = i;
1977 }
1978 }
1979 pPage->iNext = NIL_PGMPOOL_IDX;
1980}
1981
1982
1983/**
1984 * Frees up one cache page.
1985 *
1986 * @returns VBox status code.
1987 * @retval VINF_SUCCESS on success.
1988 * @param pPool The pool.
1989 * @param iUser The user index.
1990 */
1991static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1992{
1993#ifndef IN_RC
1994 const PVM pVM = pPool->CTX_SUFF(pVM);
1995#endif
1996 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1997 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1998
1999 /*
2000 * Select one page from the tail of the age list.
2001 */
2002 PPGMPOOLPAGE pPage;
2003 for (unsigned iLoop = 0; ; iLoop++)
2004 {
2005 uint16_t iToFree = pPool->iAgeTail;
2006 if (iToFree == iUser)
2007 iToFree = pPool->aPages[iToFree].iAgePrev;
2008/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2009 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2010 {
2011 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2012 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2013 {
2014 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2015 continue;
2016 iToFree = i;
2017 break;
2018 }
2019 }
2020*/
2021 Assert(iToFree != iUser);
2022 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
2023 pPage = &pPool->aPages[iToFree];
2024
2025 /*
2026 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2027 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2028 */
2029 if (!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage))
2030 break;
2031 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2032 pgmPoolCacheUsed(pPool, pPage);
2033 AssertLogRelReturn(iLoop < 8192, VERR_INTERNAL_ERROR);
2034 }
2035
2036 /*
2037 * Found a usable page, flush it and return.
2038 */
2039 int rc = pgmPoolFlushPage(pPool, pPage);
2040 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2041 /* todo: find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2042 if (rc == VINF_SUCCESS)
2043 PGM_INVL_ALL_VCPU_TLBS(pVM);
2044 return rc;
2045}
2046
2047
2048/**
2049 * Checks if a kind mismatch is really a page being reused
2050 * or if it's just normal remappings.
2051 *
2052 * @returns true if reused and the cached page (enmKind1) should be flushed
2053 * @returns false if not reused.
2054 * @param enmKind1 The kind of the cached page.
2055 * @param enmKind2 The kind of the requested page.
2056 */
2057static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2058{
2059 switch (enmKind1)
2060 {
2061 /*
2062 * Never reuse them. There is no remapping in non-paging mode.
2063 */
2064 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2065 case PGMPOOLKIND_32BIT_PD_PHYS:
2066 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2067 case PGMPOOLKIND_PAE_PD_PHYS:
2068 case PGMPOOLKIND_PAE_PDPT_PHYS:
2069 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2070 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2071 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2072 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2073 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2074 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2075 return false;
2076
2077 /*
2078 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2079 */
2080 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2081 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2082 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2083 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2084 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2085 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2086 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2087 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2088 case PGMPOOLKIND_32BIT_PD:
2089 case PGMPOOLKIND_PAE_PDPT:
2090 switch (enmKind2)
2091 {
2092 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2093 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2094 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2095 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2096 case PGMPOOLKIND_64BIT_PML4:
2097 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2098 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2099 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2100 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2101 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2102 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2103 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2104 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2105 return true;
2106 default:
2107 return false;
2108 }
2109
2110 /*
2111 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2112 */
2113 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2114 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2115 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2116 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2117 case PGMPOOLKIND_64BIT_PML4:
2118 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2119 switch (enmKind2)
2120 {
2121 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2122 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2123 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2124 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2125 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2126 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2127 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2128 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2129 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2130 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2131 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2132 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2133 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2134 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2135 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2136 return true;
2137 default:
2138 return false;
2139 }
2140
2141 /*
2142 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2143 */
2144 case PGMPOOLKIND_ROOT_NESTED:
2145 return false;
2146
2147 default:
2148 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2149 }
2150}
2151
2152
2153/**
2154 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2155 *
2156 * @returns VBox status code.
2157 * @retval VINF_PGM_CACHED_PAGE on success.
2158 * @retval VERR_FILE_NOT_FOUND if not found.
2159 * @param pPool The pool.
2160 * @param GCPhys The GC physical address of the page we're gonna shadow.
2161 * @param enmKind The kind of mapping.
2162 * @param enmAccess Access type for the mapping (only relevant for big pages)
2163 * @param iUser The shadow page pool index of the user table.
2164 * @param iUserTable The index into the user table (shadowed).
2165 * @param ppPage Where to store the pointer to the page.
2166 */
2167static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2168{
2169#ifndef IN_RC
2170 const PVM pVM = pPool->CTX_SUFF(pVM);
2171#endif
2172 /*
2173 * Look up the GCPhys in the hash.
2174 */
2175 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2176 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2177 if (i != NIL_PGMPOOL_IDX)
2178 {
2179 do
2180 {
2181 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2182 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2183 if (pPage->GCPhys == GCPhys)
2184 {
2185 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2186 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess)
2187 {
2188 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2189 * doesn't flush it in case there are no more free use records.
2190 */
2191 pgmPoolCacheUsed(pPool, pPage);
2192
2193 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2194 if (RT_SUCCESS(rc))
2195 {
2196 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2197 *ppPage = pPage;
2198 if (pPage->cModifications)
2199 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2200 STAM_COUNTER_INC(&pPool->StatCacheHits);
2201 return VINF_PGM_CACHED_PAGE;
2202 }
2203 return rc;
2204 }
2205
2206 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2207 {
2208 /*
2209 * The kind is different. In some cases we should now flush the page
2210 * as it has been reused, but in most cases this is normal remapping
2211 * of PDs as PT or big pages using the GCPhys field in a slightly
2212 * different way than the other kinds.
2213 */
2214 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2215 {
2216 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2217 pgmPoolFlushPage(pPool, pPage);
2218 break;
2219 }
2220 }
2221 }
2222
2223 /* next */
2224 i = pPage->iNext;
2225 } while (i != NIL_PGMPOOL_IDX);
2226 }
2227
2228 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2229 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2230 return VERR_FILE_NOT_FOUND;
2231}
2232
2233
2234/**
2235 * Inserts a page into the cache.
2236 *
2237 * @param pPool The pool.
2238 * @param pPage The cached page.
2239 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2240 */
2241static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2242{
2243 /*
2244 * Insert into the GCPhys hash if the page is fit for that.
2245 */
2246 Assert(!pPage->fCached);
2247 if (fCanBeCached)
2248 {
2249 pPage->fCached = true;
2250 pgmPoolHashInsert(pPool, pPage);
2251 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2252 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2253 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2254 }
2255 else
2256 {
2257 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2258 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2259 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2260 }
2261
2262 /*
2263 * Insert at the head of the age list.
2264 */
2265 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2266 pPage->iAgeNext = pPool->iAgeHead;
2267 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2268 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2269 else
2270 pPool->iAgeTail = pPage->idx;
2271 pPool->iAgeHead = pPage->idx;
2272}
2273
2274
2275/**
2276 * Flushes a cached page.
2277 *
2278 * @param pPool The pool.
2279 * @param pPage The cached page.
2280 */
2281static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2282{
2283 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2284
2285 /*
2286 * Remove the page from the hash.
2287 */
2288 if (pPage->fCached)
2289 {
2290 pPage->fCached = false;
2291 pgmPoolHashRemove(pPool, pPage);
2292 }
2293 else
2294 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2295
2296 /*
2297 * Remove it from the age list.
2298 */
2299 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2300 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2301 else
2302 pPool->iAgeTail = pPage->iAgePrev;
2303 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2304 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2305 else
2306 pPool->iAgeHead = pPage->iAgeNext;
2307 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2308 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2309}
2310
2311
2312/**
2313 * Looks for pages sharing the monitor.
2314 *
2315 * @returns Pointer to the head page.
2316 * @returns NULL if not found.
2317 * @param pPool The Pool
2318 * @param pNewPage The page which is going to be monitored.
2319 */
2320static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2321{
2322 /*
2323 * Look up the GCPhys in the hash.
2324 */
2325 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2326 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2327 if (i == NIL_PGMPOOL_IDX)
2328 return NULL;
2329 do
2330 {
2331 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2332 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2333 && pPage != pNewPage)
2334 {
2335 switch (pPage->enmKind)
2336 {
2337 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2338 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2339 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2340 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2341 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2342 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2343 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2344 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2345 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2346 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2347 case PGMPOOLKIND_64BIT_PML4:
2348 case PGMPOOLKIND_32BIT_PD:
2349 case PGMPOOLKIND_PAE_PDPT:
2350 {
2351 /* find the head */
2352 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2353 {
2354 Assert(pPage->iMonitoredPrev != pPage->idx);
2355 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2356 }
2357 return pPage;
2358 }
2359
2360 /* ignore, no monitoring. */
2361 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2362 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2363 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2364 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2365 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2366 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2367 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2368 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2369 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2370 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2371 case PGMPOOLKIND_ROOT_NESTED:
2372 case PGMPOOLKIND_PAE_PD_PHYS:
2373 case PGMPOOLKIND_PAE_PDPT_PHYS:
2374 case PGMPOOLKIND_32BIT_PD_PHYS:
2375 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2376 break;
2377 default:
2378 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2379 }
2380 }
2381
2382 /* next */
2383 i = pPage->iNext;
2384 } while (i != NIL_PGMPOOL_IDX);
2385 return NULL;
2386}
2387
2388
2389/**
2390 * Enabled write monitoring of a guest page.
2391 *
2392 * @returns VBox status code.
2393 * @retval VINF_SUCCESS on success.
2394 * @param pPool The pool.
2395 * @param pPage The cached page.
2396 */
2397static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2398{
2399 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2400
2401 /*
2402 * Filter out the relevant kinds.
2403 */
2404 switch (pPage->enmKind)
2405 {
2406 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2407 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2408 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2409 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2410 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2411 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2412 case PGMPOOLKIND_64BIT_PML4:
2413 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2414 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2415 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2416 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2417 case PGMPOOLKIND_32BIT_PD:
2418 case PGMPOOLKIND_PAE_PDPT:
2419 break;
2420
2421 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2422 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2423 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2424 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2425 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2426 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2427 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2428 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2429 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2430 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2431 case PGMPOOLKIND_ROOT_NESTED:
2432 /* Nothing to monitor here. */
2433 return VINF_SUCCESS;
2434
2435 case PGMPOOLKIND_32BIT_PD_PHYS:
2436 case PGMPOOLKIND_PAE_PDPT_PHYS:
2437 case PGMPOOLKIND_PAE_PD_PHYS:
2438 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2439 /* Nothing to monitor here. */
2440 return VINF_SUCCESS;
2441 default:
2442 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2443 }
2444
2445 /*
2446 * Install handler.
2447 */
2448 int rc;
2449 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2450 if (pPageHead)
2451 {
2452 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2453 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2454
2455#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2456 if (pPageHead->fDirty)
2457 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirty, false /* do not remove */);
2458#endif
2459
2460 pPage->iMonitoredPrev = pPageHead->idx;
2461 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2462 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2463 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2464 pPageHead->iMonitoredNext = pPage->idx;
2465 rc = VINF_SUCCESS;
2466 }
2467 else
2468 {
2469 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2470 PVM pVM = pPool->CTX_SUFF(pVM);
2471 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2472 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
2473 GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK,
2474 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
2475 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
2476 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
2477 pPool->pszAccessHandler);
2478 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2479 * the heap size should suffice. */
2480 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2481 PVMCPU pVCpu = VMMGetCpu(pVM);
2482 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2483 }
2484 pPage->fMonitored = true;
2485 return rc;
2486}
2487
2488
2489/**
2490 * Disables write monitoring of a guest page.
2491 *
2492 * @returns VBox status code.
2493 * @retval VINF_SUCCESS on success.
2494 * @param pPool The pool.
2495 * @param pPage The cached page.
2496 */
2497static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2498{
2499 /*
2500 * Filter out the relevant kinds.
2501 */
2502 switch (pPage->enmKind)
2503 {
2504 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2505 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2506 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2507 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2508 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2509 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2510 case PGMPOOLKIND_64BIT_PML4:
2511 case PGMPOOLKIND_32BIT_PD:
2512 case PGMPOOLKIND_PAE_PDPT:
2513 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2514 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2515 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2516 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2517 break;
2518
2519 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2520 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2521 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2522 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2523 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2524 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2525 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2526 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2527 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2528 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2529 case PGMPOOLKIND_ROOT_NESTED:
2530 case PGMPOOLKIND_PAE_PD_PHYS:
2531 case PGMPOOLKIND_PAE_PDPT_PHYS:
2532 case PGMPOOLKIND_32BIT_PD_PHYS:
2533 /* Nothing to monitor here. */
2534 Assert(!pPage->fMonitored);
2535 return VINF_SUCCESS;
2536
2537 default:
2538 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2539 }
2540 Assert(pPage->fMonitored);
2541
2542 /*
2543 * Remove the page from the monitored list or uninstall it if last.
2544 */
2545 const PVM pVM = pPool->CTX_SUFF(pVM);
2546 int rc;
2547 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2548 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2549 {
2550 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2551 {
2552 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2553 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2554 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK,
2555 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
2556 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
2557 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
2558 pPool->pszAccessHandler);
2559 AssertFatalRCSuccess(rc);
2560 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2561 }
2562 else
2563 {
2564 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2565 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2566 {
2567 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2568 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2569 }
2570 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2571 rc = VINF_SUCCESS;
2572 }
2573 }
2574 else
2575 {
2576 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2577 AssertFatalRC(rc);
2578 PVMCPU pVCpu = VMMGetCpu(pVM);
2579 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2580 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2581 }
2582 pPage->fMonitored = false;
2583
2584 /*
2585 * Remove it from the list of modified pages (if in it).
2586 */
2587 pgmPoolMonitorModifiedRemove(pPool, pPage);
2588
2589 return rc;
2590}
2591
2592
2593/**
2594 * Inserts the page into the list of modified pages.
2595 *
2596 * @param pPool The pool.
2597 * @param pPage The page.
2598 */
2599void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2600{
2601 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2602 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2603 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2604 && pPool->iModifiedHead != pPage->idx,
2605 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2606 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2607 pPool->iModifiedHead, pPool->cModifiedPages));
2608
2609 pPage->iModifiedNext = pPool->iModifiedHead;
2610 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2611 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2612 pPool->iModifiedHead = pPage->idx;
2613 pPool->cModifiedPages++;
2614#ifdef VBOX_WITH_STATISTICS
2615 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2616 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2617#endif
2618}
2619
2620
2621/**
2622 * Removes the page from the list of modified pages and resets the
2623 * moficiation counter.
2624 *
2625 * @param pPool The pool.
2626 * @param pPage The page which is believed to be in the list of modified pages.
2627 */
2628static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2629{
2630 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2631 if (pPool->iModifiedHead == pPage->idx)
2632 {
2633 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2634 pPool->iModifiedHead = pPage->iModifiedNext;
2635 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2636 {
2637 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2638 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2639 }
2640 pPool->cModifiedPages--;
2641 }
2642 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2643 {
2644 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2645 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2646 {
2647 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2648 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2649 }
2650 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2651 pPool->cModifiedPages--;
2652 }
2653 else
2654 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2655 pPage->cModifications = 0;
2656}
2657
2658
2659/**
2660 * Zaps the list of modified pages, resetting their modification counters in the process.
2661 *
2662 * @param pVM The VM handle.
2663 */
2664static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2665{
2666 pgmLock(pVM);
2667 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2668 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2669
2670 unsigned cPages = 0; NOREF(cPages);
2671
2672#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2673 pgmPoolResetDirtyPages(pVM);
2674#endif
2675
2676 uint16_t idx = pPool->iModifiedHead;
2677 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2678 while (idx != NIL_PGMPOOL_IDX)
2679 {
2680 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2681 idx = pPage->iModifiedNext;
2682 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2683 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2684 pPage->cModifications = 0;
2685 Assert(++cPages);
2686 }
2687 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2688 pPool->cModifiedPages = 0;
2689 pgmUnlock(pVM);
2690}
2691
2692
2693/**
2694 * Handle SyncCR3 pool tasks
2695 *
2696 * @returns VBox status code.
2697 * @retval VINF_SUCCESS if successfully added.
2698 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2699 * @param pVCpu The VMCPU handle.
2700 * @remark Should only be used when monitoring is available, thus placed in
2701 * the PGMPOOL_WITH_MONITORING #ifdef.
2702 */
2703int pgmPoolSyncCR3(PVMCPU pVCpu)
2704{
2705 PVM pVM = pVCpu->CTX_SUFF(pVM);
2706 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2707
2708 /*
2709 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2710 * Occasionally we will have to clear all the shadow page tables because we wanted
2711 * to monitor a page which was mapped by too many shadowed page tables. This operation
2712 * sometimes refered to as a 'lightweight flush'.
2713 */
2714# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2715 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2716 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2717# else /* !IN_RING3 */
2718 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2719 {
2720 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2721 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2722
2723 /* Make sure all other VCPUs return to ring 3. */
2724 if (pVM->cCpus > 1)
2725 {
2726 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2727 PGM_INVL_ALL_VCPU_TLBS(pVM);
2728 }
2729 return VINF_PGM_SYNC_CR3;
2730 }
2731# endif /* !IN_RING3 */
2732 else
2733 {
2734 pgmPoolMonitorModifiedClearAll(pVM);
2735
2736 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2737 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2738 {
2739 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2740 return pgmPoolSyncCR3(pVCpu);
2741 }
2742 }
2743 return VINF_SUCCESS;
2744}
2745
2746
2747/**
2748 * Frees up at least one user entry.
2749 *
2750 * @returns VBox status code.
2751 * @retval VINF_SUCCESS if successfully added.
2752 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2753 * @param pPool The pool.
2754 * @param iUser The user index.
2755 */
2756static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2757{
2758 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2759 /*
2760 * Just free cached pages in a braindead fashion.
2761 */
2762 /** @todo walk the age list backwards and free the first with usage. */
2763 int rc = VINF_SUCCESS;
2764 do
2765 {
2766 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2767 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2768 rc = rc2;
2769 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2770 return rc;
2771}
2772
2773
2774/**
2775 * Inserts a page into the cache.
2776 *
2777 * This will create user node for the page, insert it into the GCPhys
2778 * hash, and insert it into the age list.
2779 *
2780 * @returns VBox status code.
2781 * @retval VINF_SUCCESS if successfully added.
2782 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2783 * @param pPool The pool.
2784 * @param pPage The cached page.
2785 * @param GCPhys The GC physical address of the page we're gonna shadow.
2786 * @param iUser The user index.
2787 * @param iUserTable The user table index.
2788 */
2789DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2790{
2791 int rc = VINF_SUCCESS;
2792 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2793
2794 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable));
2795
2796#ifdef VBOX_STRICT
2797 /*
2798 * Check that the entry doesn't already exists.
2799 */
2800 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2801 {
2802 uint16_t i = pPage->iUserHead;
2803 do
2804 {
2805 Assert(i < pPool->cMaxUsers);
2806 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2807 i = paUsers[i].iNext;
2808 } while (i != NIL_PGMPOOL_USER_INDEX);
2809 }
2810#endif
2811
2812 /*
2813 * Find free a user node.
2814 */
2815 uint16_t i = pPool->iUserFreeHead;
2816 if (i == NIL_PGMPOOL_USER_INDEX)
2817 {
2818 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2819 if (RT_FAILURE(rc))
2820 return rc;
2821 i = pPool->iUserFreeHead;
2822 }
2823
2824 /*
2825 * Unlink the user node from the free list,
2826 * initialize and insert it into the user list.
2827 */
2828 pPool->iUserFreeHead = paUsers[i].iNext;
2829 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2830 paUsers[i].iUser = iUser;
2831 paUsers[i].iUserTable = iUserTable;
2832 pPage->iUserHead = i;
2833
2834 /*
2835 * Insert into cache and enable monitoring of the guest page if enabled.
2836 *
2837 * Until we implement caching of all levels, including the CR3 one, we'll
2838 * have to make sure we don't try monitor & cache any recursive reuse of
2839 * a monitored CR3 page. Because all windows versions are doing this we'll
2840 * have to be able to do combined access monitoring, CR3 + PT and
2841 * PD + PT (guest PAE).
2842 *
2843 * Update:
2844 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2845 */
2846 const bool fCanBeMonitored = true;
2847 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2848 if (fCanBeMonitored)
2849 {
2850 rc = pgmPoolMonitorInsert(pPool, pPage);
2851 AssertRC(rc);
2852 }
2853 return rc;
2854}
2855
2856
2857/**
2858 * Adds a user reference to a page.
2859 *
2860 * This will move the page to the head of the
2861 *
2862 * @returns VBox status code.
2863 * @retval VINF_SUCCESS if successfully added.
2864 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2865 * @param pPool The pool.
2866 * @param pPage The cached page.
2867 * @param iUser The user index.
2868 * @param iUserTable The user table.
2869 */
2870static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2871{
2872 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2873
2874 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2875
2876# ifdef VBOX_STRICT
2877 /*
2878 * Check that the entry doesn't already exists. We only allow multiple
2879 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
2880 */
2881 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2882 {
2883 uint16_t i = pPage->iUserHead;
2884 do
2885 {
2886 Assert(i < pPool->cMaxUsers);
2887 AssertMsg(iUser != PGMPOOL_IDX_PD || iUser != PGMPOOL_IDX_PDPT || iUser != PGMPOOL_IDX_NESTED_ROOT || iUser != PGMPOOL_IDX_AMD64_CR3 ||
2888 paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2889 i = paUsers[i].iNext;
2890 } while (i != NIL_PGMPOOL_USER_INDEX);
2891 }
2892# endif
2893
2894 /*
2895 * Allocate a user node.
2896 */
2897 uint16_t i = pPool->iUserFreeHead;
2898 if (i == NIL_PGMPOOL_USER_INDEX)
2899 {
2900 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2901 if (RT_FAILURE(rc))
2902 return rc;
2903 i = pPool->iUserFreeHead;
2904 }
2905 pPool->iUserFreeHead = paUsers[i].iNext;
2906
2907 /*
2908 * Initialize the user node and insert it.
2909 */
2910 paUsers[i].iNext = pPage->iUserHead;
2911 paUsers[i].iUser = iUser;
2912 paUsers[i].iUserTable = iUserTable;
2913 pPage->iUserHead = i;
2914
2915# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2916 if (pPage->fDirty)
2917 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirty, false /* do not remove */);
2918# endif
2919
2920 /*
2921 * Tell the cache to update its replacement stats for this page.
2922 */
2923 pgmPoolCacheUsed(pPool, pPage);
2924 return VINF_SUCCESS;
2925}
2926
2927
2928/**
2929 * Frees a user record associated with a page.
2930 *
2931 * This does not clear the entry in the user table, it simply replaces the
2932 * user record to the chain of free records.
2933 *
2934 * @param pPool The pool.
2935 * @param HCPhys The HC physical address of the shadow page.
2936 * @param iUser The shadow page pool index of the user table.
2937 * @param iUserTable The index into the user table (shadowed).
2938 */
2939static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2940{
2941 /*
2942 * Unlink and free the specified user entry.
2943 */
2944 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2945
2946 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2947 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2948 uint16_t i = pPage->iUserHead;
2949 if ( i != NIL_PGMPOOL_USER_INDEX
2950 && paUsers[i].iUser == iUser
2951 && paUsers[i].iUserTable == iUserTable)
2952 {
2953 pPage->iUserHead = paUsers[i].iNext;
2954
2955 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2956 paUsers[i].iNext = pPool->iUserFreeHead;
2957 pPool->iUserFreeHead = i;
2958 return;
2959 }
2960
2961 /* General: Linear search. */
2962 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2963 while (i != NIL_PGMPOOL_USER_INDEX)
2964 {
2965 if ( paUsers[i].iUser == iUser
2966 && paUsers[i].iUserTable == iUserTable)
2967 {
2968 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2969 paUsers[iPrev].iNext = paUsers[i].iNext;
2970 else
2971 pPage->iUserHead = paUsers[i].iNext;
2972
2973 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2974 paUsers[i].iNext = pPool->iUserFreeHead;
2975 pPool->iUserFreeHead = i;
2976 return;
2977 }
2978 iPrev = i;
2979 i = paUsers[i].iNext;
2980 }
2981
2982 /* Fatal: didn't find it */
2983 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
2984 iUser, iUserTable, pPage->GCPhys));
2985}
2986
2987
2988/**
2989 * Gets the entry size of a shadow table.
2990 *
2991 * @param enmKind The kind of page.
2992 *
2993 * @returns The size of the entry in bytes. That is, 4 or 8.
2994 * @returns If the kind is not for a table, an assertion is raised and 0 is
2995 * returned.
2996 */
2997DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2998{
2999 switch (enmKind)
3000 {
3001 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3002 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3003 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3004 case PGMPOOLKIND_32BIT_PD:
3005 case PGMPOOLKIND_32BIT_PD_PHYS:
3006 return 4;
3007
3008 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3009 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3010 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3011 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3012 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3013 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3014 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3015 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3016 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3017 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3018 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3019 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3020 case PGMPOOLKIND_64BIT_PML4:
3021 case PGMPOOLKIND_PAE_PDPT:
3022 case PGMPOOLKIND_ROOT_NESTED:
3023 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3024 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3025 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3026 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3027 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3028 case PGMPOOLKIND_PAE_PD_PHYS:
3029 case PGMPOOLKIND_PAE_PDPT_PHYS:
3030 return 8;
3031
3032 default:
3033 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3034 }
3035}
3036
3037
3038/**
3039 * Gets the entry size of a guest table.
3040 *
3041 * @param enmKind The kind of page.
3042 *
3043 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3044 * @returns If the kind is not for a table, an assertion is raised and 0 is
3045 * returned.
3046 */
3047DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3048{
3049 switch (enmKind)
3050 {
3051 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3052 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3053 case PGMPOOLKIND_32BIT_PD:
3054 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3055 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3056 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3057 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3058 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3059 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3060 return 4;
3061
3062 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3063 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3064 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3065 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3066 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3067 case PGMPOOLKIND_64BIT_PML4:
3068 case PGMPOOLKIND_PAE_PDPT:
3069 return 8;
3070
3071 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3072 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3073 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3074 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3075 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3076 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3077 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3078 case PGMPOOLKIND_ROOT_NESTED:
3079 case PGMPOOLKIND_PAE_PD_PHYS:
3080 case PGMPOOLKIND_PAE_PDPT_PHYS:
3081 case PGMPOOLKIND_32BIT_PD_PHYS:
3082 /** @todo can we return 0? (nobody is calling this...) */
3083 AssertFailed();
3084 return 0;
3085
3086 default:
3087 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3088 }
3089}
3090
3091
3092/**
3093 * Checks one shadow page table entry for a mapping of a physical page.
3094 *
3095 * @returns true / false indicating removal of all relevant PTEs
3096 *
3097 * @param pVM The VM handle.
3098 * @param pPhysPage The guest page in question.
3099 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3100 * @param iShw The shadow page table.
3101 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3102 */
3103static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3104{
3105 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3106 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3107 bool fRet = false;
3108
3109 /*
3110 * Assert sanity.
3111 */
3112 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3113 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3114 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3115
3116 /*
3117 * Then, clear the actual mappings to the page in the shadow PT.
3118 */
3119 switch (pPage->enmKind)
3120 {
3121 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3122 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3123 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3124 {
3125 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3126 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3127 uint32_t u32AndMask = 0;
3128 uint32_t u32OrMask = 0;
3129
3130 if (!fFlushPTEs)
3131 {
3132 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3133 {
3134 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
3135 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
3136 u32OrMask = X86_PTE_RW;
3137 u32AndMask = UINT32_MAX;
3138 fRet = true;
3139 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3140 break;
3141
3142 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3143 u32OrMask = 0;
3144 u32AndMask = ~X86_PTE_RW;
3145 fRet = true;
3146 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3147 break;
3148 default:
3149 /* (shouldn't be here, will assert below) */
3150 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3151 break;
3152 }
3153 }
3154 else
3155 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3156
3157 /* Update the counter if we're removing references. */
3158 if (!u32AndMask)
3159 {
3160 Assert(pPage->cPresent );
3161 Assert(pPool->cPresent);
3162 pPage->cPresent--;
3163 pPool->cPresent--;
3164 }
3165
3166 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3167 {
3168 X86PTE Pte;
3169
3170 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3171 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3172 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3173 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3174
3175 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3176 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3177 return fRet;
3178 }
3179#ifdef LOG_ENABLED
3180 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3181 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3182 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3183 {
3184 Log(("i=%d cFound=%d\n", i, ++cFound));
3185 }
3186#endif
3187 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3188 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3189 break;
3190 }
3191
3192 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3193 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3194 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3195 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3196 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3197 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3198 {
3199 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3200 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3201 uint64_t u64OrMask = 0;
3202 uint64_t u64AndMask = 0;
3203
3204 if (!fFlushPTEs)
3205 {
3206 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3207 {
3208 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3209 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3210 u64OrMask = X86_PTE_RW;
3211 u64AndMask = UINT64_MAX;
3212 fRet = true;
3213 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3214 break;
3215
3216 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3217 u64OrMask = 0;
3218 u64AndMask = ~(uint64_t)X86_PTE_RW;
3219 fRet = true;
3220 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3221 break;
3222
3223 default:
3224 /* (shouldn't be here, will assert below) */
3225 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3226 break;
3227 }
3228 }
3229 else
3230 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3231
3232 /* Update the counter if we're removing references. */
3233 if (!u64AndMask)
3234 {
3235 Assert(pPage->cPresent);
3236 Assert(pPool->cPresent);
3237 pPage->cPresent--;
3238 pPool->cPresent--;
3239 }
3240
3241 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3242 {
3243 X86PTEPAE Pte;
3244
3245 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3246 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3247 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3248 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3249
3250 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3251 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3252 return fRet;
3253 }
3254#ifdef LOG_ENABLED
3255 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3256 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3257 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3258 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3259 Log(("i=%d cFound=%d\n", i, ++cFound));
3260#endif
3261 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3262 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3263 break;
3264 }
3265
3266#ifdef PGM_WITH_LARGE_PAGES
3267 /* Large page case only. */
3268 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3269 {
3270 Assert(pVM->pgm.s.fNestedPaging);
3271
3272 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3273 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3274
3275 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3276 {
3277 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3278 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3279 pPD->a[iPte].u = 0;
3280 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3281
3282 /* Update the counter as we're removing references. */
3283 Assert(pPage->cPresent);
3284 Assert(pPool->cPresent);
3285 pPage->cPresent--;
3286 pPool->cPresent--;
3287
3288 return fRet;
3289 }
3290# ifdef LOG_ENABLED
3291 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3292 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3293 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3294 Log(("i=%d cFound=%d\n", i, ++cFound));
3295# endif
3296 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3297 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3298 break;
3299 }
3300
3301 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3302 case PGMPOOLKIND_PAE_PD_PHYS:
3303 {
3304 Assert(pVM->pgm.s.fNestedPaging);
3305
3306 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3307 PX86PD pPD = (PX86PD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3308
3309 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3310 {
3311 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3312 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3313 pPD->a[iPte].u = 0;
3314 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3315
3316 /* Update the counter as we're removing references. */
3317 Assert(pPage->cPresent);
3318 Assert(pPool->cPresent);
3319 pPage->cPresent--;
3320 pPool->cPresent--;
3321 return fRet;
3322 }
3323# ifdef LOG_ENABLED
3324 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3325 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3326 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3327 Log(("i=%d cFound=%d\n", i, ++cFound));
3328# endif
3329 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3330 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3331 break;
3332 }
3333#endif /* PGM_WITH_LARGE_PAGES */
3334
3335 default:
3336 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3337 }
3338 return fRet;
3339}
3340
3341
3342/**
3343 * Scans one shadow page table for mappings of a physical page.
3344 *
3345 * @param pVM The VM handle.
3346 * @param pPhysPage The guest page in question.
3347 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3348 * @param iShw The shadow page table.
3349 */
3350static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3351{
3352 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3353
3354 /* We should only come here with when there's only one reference to this physical page. */
3355 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3356
3357 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3358 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3359 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3360 if (!fKeptPTEs)
3361 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3362 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3363}
3364
3365
3366/**
3367 * Flushes a list of shadow page tables mapping the same physical page.
3368 *
3369 * @param pVM The VM handle.
3370 * @param pPhysPage The guest page in question.
3371 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3372 * @param iPhysExt The physical cross reference extent list to flush.
3373 */
3374static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3375{
3376 Assert(PGMIsLockOwner(pVM));
3377 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3378 bool fKeepList = false;
3379
3380 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3381 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3382
3383 const uint16_t iPhysExtStart = iPhysExt;
3384 PPGMPOOLPHYSEXT pPhysExt;
3385 do
3386 {
3387 Assert(iPhysExt < pPool->cMaxPhysExts);
3388 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3389 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3390 {
3391 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3392 {
3393 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3394 if (!fKeptPTEs)
3395 {
3396 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3397 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3398 }
3399 else
3400 fKeepList = true;
3401 }
3402 }
3403 /* next */
3404 iPhysExt = pPhysExt->iNext;
3405 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3406
3407 if (!fKeepList)
3408 {
3409 /* insert the list into the free list and clear the ram range entry. */
3410 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3411 pPool->iPhysExtFreeHead = iPhysExtStart;
3412 /* Invalidate the tracking data. */
3413 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3414 }
3415
3416 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3417}
3418
3419
3420/**
3421 * Flushes all shadow page table mappings of the given guest page.
3422 *
3423 * This is typically called when the host page backing the guest one has been
3424 * replaced or when the page protection was changed due to a guest access
3425 * caught by the monitoring.
3426 *
3427 * @returns VBox status code.
3428 * @retval VINF_SUCCESS if all references has been successfully cleared.
3429 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3430 * pool cleaning. FF and sync flags are set.
3431 *
3432 * @param pVM The VM handle.
3433 * @param GCPhysPage GC physical address of the page in question
3434 * @param pPhysPage The guest page in question.
3435 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3436 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3437 * flushed, it is NOT touched if this isn't necessary.
3438 * The caller MUST initialized this to @a false.
3439 */
3440int pgmPoolTrackUpdateGCPhys(PVM pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3441{
3442 PVMCPU pVCpu = VMMGetCpu(pVM);
3443 pgmLock(pVM);
3444 int rc = VINF_SUCCESS;
3445
3446#ifdef PGM_WITH_LARGE_PAGES
3447 /* Is this page part of a large page? */
3448 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3449 {
3450 PPGMPAGE pPhysBase;
3451 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3452
3453 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3454
3455 /* Fetch the large page base. */
3456 if (GCPhysBase != GCPhysPage)
3457 {
3458 pPhysBase = pgmPhysGetPage(&pVM->pgm.s, GCPhysBase);
3459 AssertFatal(pPhysBase);
3460 }
3461 else
3462 pPhysBase = pPhysPage;
3463
3464 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3465
3466 if (PGM_PAGE_GET_PDE_TYPE(pPhysBase) == PGM_PAGE_PDE_TYPE_PDE)
3467 {
3468 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3469 PGM_PAGE_SET_PDE_TYPE(pPhysBase, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3470
3471 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3472 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pPhysBase, fFlushPTEs, pfFlushTLBs);
3473
3474 *pfFlushTLBs = true;
3475 pgmUnlock(pVM);
3476 return rc;
3477 }
3478 }
3479#else
3480 NOREF(GCPhysPage);
3481#endif /* PGM_WITH_LARGE_PAGES */
3482
3483 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3484 if (u16)
3485 {
3486 /*
3487 * The zero page is currently screwing up the tracking and we'll
3488 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3489 * is defined, zero pages won't normally be mapped. Some kind of solution
3490 * will be needed for this problem of course, but it will have to wait...
3491 */
3492 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3493 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3494 rc = VINF_PGM_GCPHYS_ALIASED;
3495 else
3496 {
3497# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC) /** @todo we can drop this now. */
3498 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3499 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3500 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
3501# endif
3502
3503 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3504 {
3505 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3506 pgmPoolTrackFlushGCPhysPT(pVM,
3507 pPhysPage,
3508 fFlushPTEs,
3509 PGMPOOL_TD_GET_IDX(u16));
3510 }
3511 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3512 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3513 else
3514 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3515 *pfFlushTLBs = true;
3516
3517# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
3518 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
3519# endif
3520 }
3521 }
3522
3523 if (rc == VINF_PGM_GCPHYS_ALIASED)
3524 {
3525 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3526 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3527 rc = VINF_PGM_SYNC_CR3;
3528 }
3529 pgmUnlock(pVM);
3530 return rc;
3531}
3532
3533
3534/**
3535 * Scans all shadow page tables for mappings of a physical page.
3536 *
3537 * This may be slow, but it's most likely more efficient than cleaning
3538 * out the entire page pool / cache.
3539 *
3540 * @returns VBox status code.
3541 * @retval VINF_SUCCESS if all references has been successfully cleared.
3542 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3543 * a page pool cleaning.
3544 *
3545 * @param pVM The VM handle.
3546 * @param pPhysPage The guest page in question.
3547 */
3548int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3549{
3550 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3551 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3552 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3553 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3554
3555 /*
3556 * There is a limit to what makes sense.
3557 */
3558 if ( pPool->cPresent > 1024
3559 && pVM->cCpus == 1)
3560 {
3561 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3562 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3563 return VINF_PGM_GCPHYS_ALIASED;
3564 }
3565
3566 /*
3567 * Iterate all the pages until we've encountered all that in use.
3568 * This is simple but not quite optimal solution.
3569 */
3570 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P; /** @todo drop X86_PTE_P here as we always test if present separately, anyway. */
3571 const uint32_t u32 = u64; /** @todo move into the 32BIT_PT_xx case */
3572 unsigned cLeft = pPool->cUsedPages;
3573 unsigned iPage = pPool->cCurPages;
3574 while (--iPage >= PGMPOOL_IDX_FIRST)
3575 {
3576 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3577 if ( pPage->GCPhys != NIL_RTGCPHYS
3578 && pPage->cPresent)
3579 {
3580 switch (pPage->enmKind)
3581 {
3582 /*
3583 * We only care about shadow page tables.
3584 */
3585 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3586 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3587 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3588 {
3589 unsigned cPresent = pPage->cPresent;
3590 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3591 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3592 if (pPT->a[i].n.u1Present)
3593 {
3594 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3595 {
3596 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3597 pPT->a[i].u = 0;
3598
3599 /* Update the counter as we're removing references. */
3600 Assert(pPage->cPresent);
3601 Assert(pPool->cPresent);
3602 pPage->cPresent--;
3603 pPool->cPresent--;
3604 }
3605 if (!--cPresent)
3606 break;
3607 }
3608 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3609 break;
3610 }
3611
3612 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3613 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3614 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3615 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3616 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3617 {
3618 unsigned cPresent = pPage->cPresent;
3619 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3620 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3621 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3622 {
3623 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3624 {
3625 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3626 PGMSHWPTEPAE_SET(pPT->a[i], 0); /// @todo why not atomic?
3627
3628 /* Update the counter as we're removing references. */
3629 Assert(pPage->cPresent);
3630 Assert(pPool->cPresent);
3631 pPage->cPresent--;
3632 pPool->cPresent--;
3633 }
3634 if (!--cPresent)
3635 break;
3636 }
3637 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3638 break;
3639 }
3640#ifndef IN_RC
3641 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3642 {
3643 unsigned cPresent = pPage->cPresent;
3644 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3645 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3646 if (pPT->a[i].n.u1Present)
3647 {
3648 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3649 {
3650 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3651 pPT->a[i].u = 0;
3652
3653 /* Update the counter as we're removing references. */
3654 Assert(pPage->cPresent);
3655 Assert(pPool->cPresent);
3656 pPage->cPresent--;
3657 pPool->cPresent--;
3658 }
3659 if (!--cPresent)
3660 break;
3661 }
3662 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3663 break;
3664 }
3665#endif
3666 }
3667 if (!--cLeft)
3668 break;
3669 }
3670 }
3671
3672 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3673 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3674
3675 /*
3676 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3677 */
3678 if (pPool->cPresent > 1024)
3679 {
3680 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3681 return VINF_PGM_GCPHYS_ALIASED;
3682 }
3683
3684 return VINF_SUCCESS;
3685}
3686
3687
3688/**
3689 * Clears the user entry in a user table.
3690 *
3691 * This is used to remove all references to a page when flushing it.
3692 */
3693static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3694{
3695 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3696 Assert(pUser->iUser < pPool->cCurPages);
3697 uint32_t iUserTable = pUser->iUserTable;
3698
3699 /*
3700 * Map the user page.
3701 */
3702 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3703 union
3704 {
3705 uint64_t *pau64;
3706 uint32_t *pau32;
3707 } u;
3708 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3709
3710 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3711
3712 /* Safety precaution in case we change the paging for other modes too in the future. */
3713 Assert(!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage));
3714
3715#ifdef VBOX_STRICT
3716 /*
3717 * Some sanity checks.
3718 */
3719 switch (pUserPage->enmKind)
3720 {
3721 case PGMPOOLKIND_32BIT_PD:
3722 case PGMPOOLKIND_32BIT_PD_PHYS:
3723 Assert(iUserTable < X86_PG_ENTRIES);
3724 break;
3725 case PGMPOOLKIND_PAE_PDPT:
3726 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3727 case PGMPOOLKIND_PAE_PDPT_PHYS:
3728 Assert(iUserTable < 4);
3729 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3730 break;
3731 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3732 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3733 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3734 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3735 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3736 case PGMPOOLKIND_PAE_PD_PHYS:
3737 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3738 break;
3739 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3740 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3741 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3742 break;
3743 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3744 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3745 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3746 break;
3747 case PGMPOOLKIND_64BIT_PML4:
3748 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3749 /* GCPhys >> PAGE_SHIFT is the index here */
3750 break;
3751 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3752 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3753 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3754 break;
3755
3756 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3757 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3758 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3759 break;
3760
3761 case PGMPOOLKIND_ROOT_NESTED:
3762 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3763 break;
3764
3765 default:
3766 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3767 break;
3768 }
3769#endif /* VBOX_STRICT */
3770
3771 /*
3772 * Clear the entry in the user page.
3773 */
3774 switch (pUserPage->enmKind)
3775 {
3776 /* 32-bit entries */
3777 case PGMPOOLKIND_32BIT_PD:
3778 case PGMPOOLKIND_32BIT_PD_PHYS:
3779 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
3780 break;
3781
3782 /* 64-bit entries */
3783 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3784 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3785 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3786 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3787 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3788#ifdef IN_RC
3789 /*
3790 * In 32 bits PAE mode we *must* invalidate the TLB when changing a
3791 * PDPT entry; the CPU fetches them only during cr3 load, so any
3792 * non-present PDPT will continue to cause page faults.
3793 */
3794 ASMReloadCR3();
3795 /* no break */
3796#endif
3797 case PGMPOOLKIND_PAE_PD_PHYS:
3798 case PGMPOOLKIND_PAE_PDPT_PHYS:
3799 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3800 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3801 case PGMPOOLKIND_64BIT_PML4:
3802 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3803 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3804 case PGMPOOLKIND_PAE_PDPT:
3805 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3806 case PGMPOOLKIND_ROOT_NESTED:
3807 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3808 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3809 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
3810 break;
3811
3812 default:
3813 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3814 }
3815 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
3816}
3817
3818
3819/**
3820 * Clears all users of a page.
3821 */
3822static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3823{
3824 /*
3825 * Free all the user records.
3826 */
3827 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3828
3829 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3830 uint16_t i = pPage->iUserHead;
3831 while (i != NIL_PGMPOOL_USER_INDEX)
3832 {
3833 /* Clear enter in user table. */
3834 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3835
3836 /* Free it. */
3837 const uint16_t iNext = paUsers[i].iNext;
3838 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3839 paUsers[i].iNext = pPool->iUserFreeHead;
3840 pPool->iUserFreeHead = i;
3841
3842 /* Next. */
3843 i = iNext;
3844 }
3845 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3846}
3847
3848
3849/**
3850 * Allocates a new physical cross reference extent.
3851 *
3852 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3853 * @param pVM The VM handle.
3854 * @param piPhysExt Where to store the phys ext index.
3855 */
3856PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3857{
3858 Assert(PGMIsLockOwner(pVM));
3859 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3860 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3861 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3862 {
3863 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3864 return NULL;
3865 }
3866 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3867 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3868 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3869 *piPhysExt = iPhysExt;
3870 return pPhysExt;
3871}
3872
3873
3874/**
3875 * Frees a physical cross reference extent.
3876 *
3877 * @param pVM The VM handle.
3878 * @param iPhysExt The extent to free.
3879 */
3880void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3881{
3882 Assert(PGMIsLockOwner(pVM));
3883 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3884 Assert(iPhysExt < pPool->cMaxPhysExts);
3885 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3886 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3887 {
3888 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3889 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3890 }
3891 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3892 pPool->iPhysExtFreeHead = iPhysExt;
3893}
3894
3895
3896/**
3897 * Frees a physical cross reference extent.
3898 *
3899 * @param pVM The VM handle.
3900 * @param iPhysExt The extent to free.
3901 */
3902void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3903{
3904 Assert(PGMIsLockOwner(pVM));
3905 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3906
3907 const uint16_t iPhysExtStart = iPhysExt;
3908 PPGMPOOLPHYSEXT pPhysExt;
3909 do
3910 {
3911 Assert(iPhysExt < pPool->cMaxPhysExts);
3912 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3913 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3914 {
3915 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3916 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3917 }
3918
3919 /* next */
3920 iPhysExt = pPhysExt->iNext;
3921 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3922
3923 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3924 pPool->iPhysExtFreeHead = iPhysExtStart;
3925}
3926
3927
3928/**
3929 * Insert a reference into a list of physical cross reference extents.
3930 *
3931 * @returns The new tracking data for PGMPAGE.
3932 *
3933 * @param pVM The VM handle.
3934 * @param iPhysExt The physical extent index of the list head.
3935 * @param iShwPT The shadow page table index.
3936 * @param iPte Page table entry
3937 *
3938 */
3939static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
3940{
3941 Assert(PGMIsLockOwner(pVM));
3942 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3943 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3944
3945 /*
3946 * Special common cases.
3947 */
3948 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
3949 {
3950 paPhysExts[iPhysExt].aidx[1] = iShwPT;
3951 paPhysExts[iPhysExt].apte[1] = iPte;
3952 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
3953 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
3954 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3955 }
3956 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3957 {
3958 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3959 paPhysExts[iPhysExt].apte[2] = iPte;
3960 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
3961 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
3962 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3963 }
3964 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
3965
3966 /*
3967 * General treatment.
3968 */
3969 const uint16_t iPhysExtStart = iPhysExt;
3970 unsigned cMax = 15;
3971 for (;;)
3972 {
3973 Assert(iPhysExt < pPool->cMaxPhysExts);
3974 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3975 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3976 {
3977 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3978 paPhysExts[iPhysExt].apte[i] = iPte;
3979 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
3980 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
3981 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3982 }
3983 if (!--cMax)
3984 {
3985 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackOverflows);
3986 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3987 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
3988 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3989 }
3990
3991 /* advance */
3992 iPhysExt = paPhysExts[iPhysExt].iNext;
3993 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3994 break;
3995 }
3996
3997 /*
3998 * Add another extent to the list.
3999 */
4000 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4001 if (!pNew)
4002 {
4003 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackNoExtentsLeft);
4004 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4005 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4006 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4007 }
4008 pNew->iNext = iPhysExtStart;
4009 pNew->aidx[0] = iShwPT;
4010 pNew->apte[0] = iPte;
4011 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4012 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4013}
4014
4015
4016/**
4017 * Add a reference to guest physical page where extents are in use.
4018 *
4019 * @returns The new tracking data for PGMPAGE.
4020 *
4021 * @param pVM The VM handle.
4022 * @param pPhysPage Pointer to the aPages entry in the ram range.
4023 * @param u16 The ram range flags (top 16-bits).
4024 * @param iShwPT The shadow page table index.
4025 * @param iPte Page table entry
4026 */
4027uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4028{
4029 pgmLock(pVM);
4030 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4031 {
4032 /*
4033 * Convert to extent list.
4034 */
4035 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4036 uint16_t iPhysExt;
4037 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4038 if (pPhysExt)
4039 {
4040 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4041 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliased);
4042 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4043 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4044 pPhysExt->aidx[1] = iShwPT;
4045 pPhysExt->apte[1] = iPte;
4046 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4047 }
4048 else
4049 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4050 }
4051 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4052 {
4053 /*
4054 * Insert into the extent list.
4055 */
4056 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4057 }
4058 else
4059 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedLots);
4060 pgmUnlock(pVM);
4061 return u16;
4062}
4063
4064
4065/**
4066 * Clear references to guest physical memory.
4067 *
4068 * @param pPool The pool.
4069 * @param pPage The page.
4070 * @param pPhysPage Pointer to the aPages entry in the ram range.
4071 * @param iPte Shadow PTE index
4072 */
4073void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4074{
4075 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4076 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4077
4078 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4079 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4080 {
4081 PVM pVM = pPool->CTX_SUFF(pVM);
4082 pgmLock(pVM);
4083
4084 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4085 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4086 do
4087 {
4088 Assert(iPhysExt < pPool->cMaxPhysExts);
4089
4090 /*
4091 * Look for the shadow page and check if it's all freed.
4092 */
4093 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4094 {
4095 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4096 && paPhysExts[iPhysExt].apte[i] == iPte)
4097 {
4098 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4099 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4100
4101 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4102 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4103 {
4104 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4105 pgmUnlock(pVM);
4106 return;
4107 }
4108
4109 /* we can free the node. */
4110 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4111 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4112 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4113 {
4114 /* lonely node */
4115 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4116 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4117 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
4118 }
4119 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4120 {
4121 /* head */
4122 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4123 PGM_PAGE_SET_TRACKING(pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4124 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4125 }
4126 else
4127 {
4128 /* in list */
4129 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4130 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4131 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4132 }
4133 iPhysExt = iPhysExtNext;
4134 pgmUnlock(pVM);
4135 return;
4136 }
4137 }
4138
4139 /* next */
4140 iPhysExtPrev = iPhysExt;
4141 iPhysExt = paPhysExts[iPhysExt].iNext;
4142 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4143
4144 pgmUnlock(pVM);
4145 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4146 }
4147 else /* nothing to do */
4148 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4149}
4150
4151/**
4152 * Clear references to guest physical memory.
4153 *
4154 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
4155 * is assumed to be correct, so the linear search can be skipped and we can assert
4156 * at an earlier point.
4157 *
4158 * @param pPool The pool.
4159 * @param pPage The page.
4160 * @param HCPhys The host physical address corresponding to the guest page.
4161 * @param GCPhys The guest physical address corresponding to HCPhys.
4162 * @param iPte Shadow PTE index
4163 */
4164static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4165{
4166 /*
4167 * Walk range list.
4168 */
4169 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
4170 while (pRam)
4171 {
4172 RTGCPHYS off = GCPhys - pRam->GCPhys;
4173 if (off < pRam->cb)
4174 {
4175 /* does it match? */
4176 const unsigned iPage = off >> PAGE_SHIFT;
4177 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
4178#ifdef LOG_ENABLED
4179 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
4180 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4181#endif
4182 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4183 {
4184 Assert(pPage->cPresent);
4185 Assert(pPool->cPresent);
4186 pPage->cPresent--;
4187 pPool->cPresent--;
4188 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4189 return;
4190 }
4191 break;
4192 }
4193 pRam = pRam->CTX_SUFF(pNext);
4194 }
4195 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4196}
4197
4198
4199/**
4200 * Clear references to guest physical memory.
4201 *
4202 * @param pPool The pool.
4203 * @param pPage The page.
4204 * @param HCPhys The host physical address corresponding to the guest page.
4205 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4206 * @param iPte Shadow pte index
4207 */
4208void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4209{
4210 RTHCPHYS HCPhysExpected = 0xDEADBEEFDEADBEEFULL;
4211
4212 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4213
4214 /*
4215 * Walk range list.
4216 */
4217 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
4218 while (pRam)
4219 {
4220 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
4221 if (off < pRam->cb)
4222 {
4223 /* does it match? */
4224 const unsigned iPage = off >> PAGE_SHIFT;
4225 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
4226 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4227 {
4228 Assert(pPage->cPresent);
4229 Assert(pPool->cPresent);
4230 pPage->cPresent--;
4231 pPool->cPresent--;
4232 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4233 return;
4234 }
4235 HCPhysExpected = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
4236 break;
4237 }
4238 pRam = pRam->CTX_SUFF(pNext);
4239 }
4240
4241 /*
4242 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4243 */
4244 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4245 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
4246 while (pRam)
4247 {
4248 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4249 while (iPage-- > 0)
4250 {
4251 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4252 {
4253 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4254 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4255 Assert(pPage->cPresent);
4256 Assert(pPool->cPresent);
4257 pPage->cPresent--;
4258 pPool->cPresent--;
4259 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4260 return;
4261 }
4262 }
4263 pRam = pRam->CTX_SUFF(pNext);
4264 }
4265
4266 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Expected HCPhys with hint = %RHp)\n", HCPhys, GCPhysHint, HCPhysExpected));
4267}
4268
4269
4270/**
4271 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4272 *
4273 * @param pPool The pool.
4274 * @param pPage The page.
4275 * @param pShwPT The shadow page table (mapping of the page).
4276 * @param pGstPT The guest page table.
4277 */
4278DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4279{
4280 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4281 {
4282 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4283 if (pShwPT->a[i].n.u1Present)
4284 {
4285 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4286 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4287 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK, i);
4288 if (!pPage->cPresent)
4289 break;
4290 }
4291 }
4292}
4293
4294
4295/**
4296 * Clear references to guest physical memory in a PAE / 32-bit page table.
4297 *
4298 * @param pPool The pool.
4299 * @param pPage The page.
4300 * @param pShwPT The shadow page table (mapping of the page).
4301 * @param pGstPT The guest page table (just a half one).
4302 */
4303DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4304{
4305 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4306 {
4307 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4308 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4309 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4310 {
4311 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4312 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4313 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK, i);
4314 if (!pPage->cPresent)
4315 break;
4316 }
4317 }
4318}
4319
4320
4321/**
4322 * Clear references to guest physical memory in a PAE / PAE page table.
4323 *
4324 * @param pPool The pool.
4325 * @param pPage The page.
4326 * @param pShwPT The shadow page table (mapping of the page).
4327 * @param pGstPT The guest page table.
4328 */
4329DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4330{
4331 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4332 {
4333 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4334 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4335 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4336 {
4337 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4338 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4339 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
4340 if (!pPage->cPresent)
4341 break;
4342 }
4343 }
4344}
4345
4346
4347/**
4348 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4349 *
4350 * @param pPool The pool.
4351 * @param pPage The page.
4352 * @param pShwPT The shadow page table (mapping of the page).
4353 */
4354DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4355{
4356 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4357 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4358 {
4359 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4360 if (pShwPT->a[i].n.u1Present)
4361 {
4362 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4363 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4364 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys, i);
4365 if (!pPage->cPresent)
4366 break;
4367 }
4368 }
4369}
4370
4371
4372/**
4373 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4374 *
4375 * @param pPool The pool.
4376 * @param pPage The page.
4377 * @param pShwPT The shadow page table (mapping of the page).
4378 */
4379DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4380{
4381 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4382 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4383 {
4384 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4385 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4386 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4387 {
4388 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4389 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4390 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys, i);
4391 if (!pPage->cPresent)
4392 break;
4393 }
4394 }
4395}
4396
4397
4398/**
4399 * Clear references to shadowed pages in an EPT page table.
4400 *
4401 * @param pPool The pool.
4402 * @param pPage The page.
4403 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4404 */
4405DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4406{
4407 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4408 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4409 {
4410 Assert((pShwPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4411 if (pShwPT->a[i].n.u1Present)
4412 {
4413 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4414 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4415 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys, i);
4416 if (!pPage->cPresent)
4417 break;
4418 }
4419 }
4420}
4421
4422
4423
4424/**
4425 * Clear references to shadowed pages in a 32 bits page directory.
4426 *
4427 * @param pPool The pool.
4428 * @param pPage The page.
4429 * @param pShwPD The shadow page directory (mapping of the page).
4430 */
4431DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4432{
4433 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4434 {
4435 Assert(!(pShwPD->a[i].u & RT_BIT_32(9)));
4436 if ( pShwPD->a[i].n.u1Present
4437 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4438 )
4439 {
4440 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4441 if (pSubPage)
4442 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4443 else
4444 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4445 }
4446 }
4447}
4448
4449/**
4450 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4451 *
4452 * @param pPool The pool.
4453 * @param pPage The page.
4454 * @param pShwPD The shadow page directory (mapping of the page).
4455 */
4456DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4457{
4458 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4459 {
4460 Assert((pShwPD->a[i].u & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4461 if ( pShwPD->a[i].n.u1Present
4462 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING))
4463 {
4464#ifdef PGM_WITH_LARGE_PAGES
4465 if (pShwPD->a[i].b.u1Size)
4466 {
4467 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4468 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4469 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys /* == base of 2 MB page */, i);
4470 }
4471 else
4472#endif
4473 {
4474 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4475 if (pSubPage)
4476 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4477 else
4478 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4479 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4480 }
4481 }
4482 }
4483}
4484
4485/**
4486 * Clear references to shadowed pages in a PAE page directory pointer table.
4487 *
4488 * @param pPool The pool.
4489 * @param pPage The page.
4490 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4491 */
4492DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4493{
4494 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4495 {
4496 Assert((pShwPDPT->a[i].u & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4497 if ( pShwPDPT->a[i].n.u1Present
4498 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4499 )
4500 {
4501 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4502 if (pSubPage)
4503 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4504 else
4505 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4506 }
4507 }
4508}
4509
4510
4511/**
4512 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4513 *
4514 * @param pPool The pool.
4515 * @param pPage The page.
4516 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4517 */
4518DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4519{
4520 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4521 {
4522 Assert((pShwPDPT->a[i].u & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4523 if (pShwPDPT->a[i].n.u1Present)
4524 {
4525 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4526 if (pSubPage)
4527 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4528 else
4529 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4530 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4531 }
4532 }
4533}
4534
4535
4536/**
4537 * Clear references to shadowed pages in a 64-bit level 4 page table.
4538 *
4539 * @param pPool The pool.
4540 * @param pPage The page.
4541 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4542 */
4543DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4544{
4545 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4546 {
4547 Assert((pShwPML4->a[i].u & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4548 if (pShwPML4->a[i].n.u1Present)
4549 {
4550 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4551 if (pSubPage)
4552 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4553 else
4554 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4555 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4556 }
4557 }
4558}
4559
4560
4561/**
4562 * Clear references to shadowed pages in an EPT page directory.
4563 *
4564 * @param pPool The pool.
4565 * @param pPage The page.
4566 * @param pShwPD The shadow page directory (mapping of the page).
4567 */
4568DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4569{
4570 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4571 {
4572 Assert((pShwPD->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4573 if (pShwPD->a[i].n.u1Present)
4574 {
4575#ifdef PGM_WITH_LARGE_PAGES
4576 if (pShwPD->a[i].b.u1Size)
4577 {
4578 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4579 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4580 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys /* == base of 2 MB page */, i);
4581 }
4582 else
4583#endif
4584 {
4585 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4586 if (pSubPage)
4587 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4588 else
4589 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4590 }
4591 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4592 }
4593 }
4594}
4595
4596
4597/**
4598 * Clear references to shadowed pages in an EPT page directory pointer table.
4599 *
4600 * @param pPool The pool.
4601 * @param pPage The page.
4602 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4603 */
4604DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4605{
4606 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4607 {
4608 Assert((pShwPDPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4609 if (pShwPDPT->a[i].n.u1Present)
4610 {
4611 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4612 if (pSubPage)
4613 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4614 else
4615 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4616 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4617 }
4618 }
4619}
4620
4621
4622/**
4623 * Clears all references made by this page.
4624 *
4625 * This includes other shadow pages and GC physical addresses.
4626 *
4627 * @param pPool The pool.
4628 * @param pPage The page.
4629 */
4630static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4631{
4632 /*
4633 * Map the shadow page and take action according to the page kind.
4634 */
4635 PVM pVM = pPool->CTX_SUFF(pVM);
4636 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4637 switch (pPage->enmKind)
4638 {
4639 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4640 {
4641 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4642 void *pvGst;
4643 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4644 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4645 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4646 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4647 break;
4648 }
4649
4650 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4651 {
4652 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4653 void *pvGst;
4654 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4655 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
4656 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4657 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4658 break;
4659 }
4660
4661 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4662 {
4663 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4664 void *pvGst;
4665 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4666 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
4667 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4668 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4669 break;
4670 }
4671
4672 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4673 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4674 {
4675 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4676 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4677 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4678 break;
4679 }
4680
4681 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4682 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4683 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4684 {
4685 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4686 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
4687 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4688 break;
4689 }
4690
4691 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4692 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4693 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4694 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4695 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4696 case PGMPOOLKIND_PAE_PD_PHYS:
4697 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4698 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4699 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4700 break;
4701
4702 case PGMPOOLKIND_32BIT_PD_PHYS:
4703 case PGMPOOLKIND_32BIT_PD:
4704 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4705 break;
4706
4707 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4708 case PGMPOOLKIND_PAE_PDPT:
4709 case PGMPOOLKIND_PAE_PDPT_PHYS:
4710 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4711 break;
4712
4713 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4714 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4715 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4716 break;
4717
4718 case PGMPOOLKIND_64BIT_PML4:
4719 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4720 break;
4721
4722 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4723 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4724 break;
4725
4726 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4727 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4728 break;
4729
4730 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4731 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4732 break;
4733
4734 default:
4735 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4736 }
4737
4738 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4739 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4740 ASMMemZeroPage(pvShw);
4741 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4742 pPage->fZeroed = true;
4743 Assert(!pPage->cPresent);
4744 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
4745}
4746
4747/**
4748 * Flushes a pool page.
4749 *
4750 * This moves the page to the free list after removing all user references to it.
4751 *
4752 * @returns VBox status code.
4753 * @retval VINF_SUCCESS on success.
4754 * @param pPool The pool.
4755 * @param HCPhys The HC physical address of the shadow page.
4756 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4757 */
4758int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4759{
4760 PVM pVM = pPool->CTX_SUFF(pVM);
4761 bool fFlushRequired = false;
4762
4763 int rc = VINF_SUCCESS;
4764 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4765 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4766 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4767
4768 /*
4769 * Quietly reject any attempts at flushing any of the special root pages.
4770 */
4771 if (pPage->idx < PGMPOOL_IDX_FIRST)
4772 {
4773 AssertFailed(); /* can no longer happen */
4774 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4775 return VINF_SUCCESS;
4776 }
4777
4778 pgmLock(pVM);
4779
4780 /*
4781 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4782 */
4783 if (pgmPoolIsPageLocked(&pVM->pgm.s, pPage))
4784 {
4785 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4786 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4787 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4788 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4789 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4790 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4791 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4792 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4793 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD,
4794 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4795 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4796 pgmUnlock(pVM);
4797 return VINF_SUCCESS;
4798 }
4799
4800#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4801 /* Start a subset so we won't run out of mapping space. */
4802 PVMCPU pVCpu = VMMGetCpu(pVM);
4803 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
4804#endif
4805
4806 /*
4807 * Mark the page as being in need of an ASMMemZeroPage().
4808 */
4809 pPage->fZeroed = false;
4810
4811#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4812 if (pPage->fDirty)
4813 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirty, false /* do not remove */);
4814#endif
4815
4816 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4817 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4818 fFlushRequired = true;
4819
4820 /*
4821 * Clear the page.
4822 */
4823 pgmPoolTrackClearPageUsers(pPool, pPage);
4824 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4825 pgmPoolTrackDeref(pPool, pPage);
4826 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4827
4828 /*
4829 * Flush it from the cache.
4830 */
4831 pgmPoolCacheFlushPage(pPool, pPage);
4832
4833#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4834 /* Heavy stuff done. */
4835 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
4836#endif
4837
4838 /*
4839 * Deregistering the monitoring.
4840 */
4841 if (pPage->fMonitored)
4842 rc = pgmPoolMonitorFlush(pPool, pPage);
4843
4844 /*
4845 * Free the page.
4846 */
4847 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4848 pPage->iNext = pPool->iFreeHead;
4849 pPool->iFreeHead = pPage->idx;
4850 pPage->enmKind = PGMPOOLKIND_FREE;
4851 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4852 pPage->GCPhys = NIL_RTGCPHYS;
4853 pPage->fReusedFlushPending = false;
4854
4855 pPool->cUsedPages--;
4856
4857 /* Flush the TLBs of all VCPUs if required. */
4858 if ( fFlushRequired
4859 && fFlush)
4860 {
4861 PGM_INVL_ALL_VCPU_TLBS(pVM);
4862 }
4863
4864 pgmUnlock(pVM);
4865 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4866 return rc;
4867}
4868
4869
4870/**
4871 * Frees a usage of a pool page.
4872 *
4873 * The caller is responsible to updating the user table so that it no longer
4874 * references the shadow page.
4875 *
4876 * @param pPool The pool.
4877 * @param HCPhys The HC physical address of the shadow page.
4878 * @param iUser The shadow page pool index of the user table.
4879 * @param iUserTable The index into the user table (shadowed).
4880 */
4881void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4882{
4883 PVM pVM = pPool->CTX_SUFF(pVM);
4884
4885 STAM_PROFILE_START(&pPool->StatFree, a);
4886 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
4887 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4888 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4889 pgmLock(pVM);
4890 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4891 if (!pPage->fCached)
4892 pgmPoolFlushPage(pPool, pPage);
4893 pgmUnlock(pVM);
4894 STAM_PROFILE_STOP(&pPool->StatFree, a);
4895}
4896
4897
4898/**
4899 * Makes one or more free page free.
4900 *
4901 * @returns VBox status code.
4902 * @retval VINF_SUCCESS on success.
4903 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4904 *
4905 * @param pPool The pool.
4906 * @param enmKind Page table kind
4907 * @param iUser The user of the page.
4908 */
4909static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4910{
4911 PVM pVM = pPool->CTX_SUFF(pVM);
4912
4913 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%d\n", iUser));
4914
4915 /*
4916 * If the pool isn't full grown yet, expand it.
4917 */
4918 if ( pPool->cCurPages < pPool->cMaxPages
4919#if defined(IN_RC)
4920 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
4921 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4922 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
4923#endif
4924 )
4925 {
4926 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4927#ifdef IN_RING3
4928 int rc = PGMR3PoolGrow(pVM);
4929#else
4930 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
4931#endif
4932 if (RT_FAILURE(rc))
4933 return rc;
4934 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4935 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4936 return VINF_SUCCESS;
4937 }
4938
4939 /*
4940 * Free one cached page.
4941 */
4942 return pgmPoolCacheFreeOne(pPool, iUser);
4943}
4944
4945/**
4946 * Allocates a page from the pool.
4947 *
4948 * This page may actually be a cached page and not in need of any processing
4949 * on the callers part.
4950 *
4951 * @returns VBox status code.
4952 * @retval VINF_SUCCESS if a NEW page was allocated.
4953 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4954 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4955 * @param pVM The VM handle.
4956 * @param GCPhys The GC physical address of the page we're gonna shadow.
4957 * For 4MB and 2MB PD entries, it's the first address the
4958 * shadow PT is covering.
4959 * @param enmKind The kind of mapping.
4960 * @param enmAccess Access type for the mapping (only relevant for big pages)
4961 * @param iUser The shadow page pool index of the user table.
4962 * @param iUserTable The index into the user table (shadowed).
4963 * @param fLockPage Lock the page
4964 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4965 */
4966int pgmPoolAllocEx(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable,
4967 bool fLockPage, PPPGMPOOLPAGE ppPage)
4968{
4969 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4970 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4971 LogFlow(("pgmPoolAllocEx: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4972 *ppPage = NULL;
4973 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4974 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4975 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4976
4977 pgmLock(pVM);
4978
4979 if (pPool->fCacheEnabled)
4980 {
4981 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, iUser, iUserTable, ppPage);
4982 if (RT_SUCCESS(rc2))
4983 {
4984 if (fLockPage)
4985 pgmPoolLockPage(pPool, *ppPage);
4986 pgmUnlock(pVM);
4987 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4988 LogFlow(("pgmPoolAllocEx: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4989 return rc2;
4990 }
4991 }
4992
4993 /*
4994 * Allocate a new one.
4995 */
4996 int rc = VINF_SUCCESS;
4997 uint16_t iNew = pPool->iFreeHead;
4998 if (iNew == NIL_PGMPOOL_IDX)
4999 {
5000 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
5001 if (RT_FAILURE(rc))
5002 {
5003 pgmUnlock(pVM);
5004 Log(("pgmPoolAllocEx: returns %Rrc (Free)\n", rc));
5005 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5006 return rc;
5007 }
5008 iNew = pPool->iFreeHead;
5009 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
5010 }
5011
5012 /* unlink the free head */
5013 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5014 pPool->iFreeHead = pPage->iNext;
5015 pPage->iNext = NIL_PGMPOOL_IDX;
5016
5017 /*
5018 * Initialize it.
5019 */
5020 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5021 pPage->enmKind = enmKind;
5022 pPage->enmAccess = enmAccess;
5023 pPage->GCPhys = GCPhys;
5024 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5025 pPage->fMonitored = false;
5026 pPage->fCached = false;
5027#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5028 pPage->fDirty = false;
5029#endif
5030 pPage->fReusedFlushPending = false;
5031 pPage->cModifications = 0;
5032 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5033 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5034 pPage->cLocked = 0;
5035 pPage->cPresent = 0;
5036 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5037 pPage->pvLastAccessHandlerFault = 0;
5038 pPage->cLastAccessHandlerCount = 0;
5039 pPage->pvLastAccessHandlerRip = 0;
5040
5041 /*
5042 * Insert into the tracking and cache. If this fails, free the page.
5043 */
5044 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5045 if (RT_FAILURE(rc3))
5046 {
5047 pPool->cUsedPages--;
5048 pPage->enmKind = PGMPOOLKIND_FREE;
5049 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5050 pPage->GCPhys = NIL_RTGCPHYS;
5051 pPage->iNext = pPool->iFreeHead;
5052 pPool->iFreeHead = pPage->idx;
5053 pgmUnlock(pVM);
5054 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5055 Log(("pgmPoolAllocEx: returns %Rrc (Insert)\n", rc3));
5056 return rc3;
5057 }
5058
5059 /*
5060 * Commit the allocation, clear the page and return.
5061 */
5062#ifdef VBOX_WITH_STATISTICS
5063 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5064 pPool->cUsedPagesHigh = pPool->cUsedPages;
5065#endif
5066
5067 if (!pPage->fZeroed)
5068 {
5069 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5070 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5071 ASMMemZeroPage(pv);
5072 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5073 }
5074
5075 *ppPage = pPage;
5076 if (fLockPage)
5077 pgmPoolLockPage(pPool, pPage);
5078 pgmUnlock(pVM);
5079 LogFlow(("pgmPoolAllocEx: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5080 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5081 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5082 return rc;
5083}
5084
5085
5086/**
5087 * Frees a usage of a pool page.
5088 *
5089 * @param pVM The VM handle.
5090 * @param HCPhys The HC physical address of the shadow page.
5091 * @param iUser The shadow page pool index of the user table.
5092 * @param iUserTable The index into the user table (shadowed).
5093 */
5094void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5095{
5096 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5097 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5098 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5099}
5100
5101/**
5102 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5103 *
5104 * @returns Pointer to the shadow page structure.
5105 * @param pPool The pool.
5106 * @param HCPhys The HC physical address of the shadow page.
5107 */
5108PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5109{
5110 PVM pVM = pPool->CTX_SUFF(pVM);
5111
5112 Assert(PGMIsLockOwner(pVM));
5113
5114 /*
5115 * Look up the page.
5116 */
5117 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5118
5119 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5120 return pPage;
5121}
5122
5123
5124/**
5125 * Internal worker for finding a page for debugging purposes, no assertions.
5126 *
5127 * @returns Pointer to the shadow page structure. NULL on if not found.
5128 * @param pPool The pool.
5129 * @param HCPhys The HC physical address of the shadow page.
5130 */
5131PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5132{
5133 PVM pVM = pPool->CTX_SUFF(pVM);
5134 Assert(PGMIsLockOwner(pVM));
5135 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5136}
5137
5138
5139#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5140/**
5141 * Flush the specified page if present
5142 *
5143 * @param pVM The VM handle.
5144 * @param GCPhys Guest physical address of the page to flush
5145 */
5146void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5147{
5148 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5149
5150 VM_ASSERT_EMT(pVM);
5151
5152 /*
5153 * Look up the GCPhys in the hash.
5154 */
5155 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5156 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5157 if (i == NIL_PGMPOOL_IDX)
5158 return;
5159
5160 do
5161 {
5162 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5163 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5164 {
5165 switch (pPage->enmKind)
5166 {
5167 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5168 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5169 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5170 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5171 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5172 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5173 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5174 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5175 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5176 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5177 case PGMPOOLKIND_64BIT_PML4:
5178 case PGMPOOLKIND_32BIT_PD:
5179 case PGMPOOLKIND_PAE_PDPT:
5180 {
5181 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5182#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5183 if (pPage->fDirty)
5184 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5185 else
5186#endif
5187 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5188 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
5189 pgmPoolMonitorChainFlush(pPool, pPage);
5190 return;
5191 }
5192
5193 /* ignore, no monitoring. */
5194 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5195 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5196 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5197 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5198 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5199 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5200 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5201 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5202 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5203 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5204 case PGMPOOLKIND_ROOT_NESTED:
5205 case PGMPOOLKIND_PAE_PD_PHYS:
5206 case PGMPOOLKIND_PAE_PDPT_PHYS:
5207 case PGMPOOLKIND_32BIT_PD_PHYS:
5208 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5209 break;
5210
5211 default:
5212 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5213 }
5214 }
5215
5216 /* next */
5217 i = pPage->iNext;
5218 } while (i != NIL_PGMPOOL_IDX);
5219 return;
5220}
5221#endif /* IN_RING3 */
5222
5223#ifdef IN_RING3
5224
5225
5226/**
5227 * Reset CPU on hot plugging.
5228 *
5229 * @param pVM The VM handle.
5230 * @param pVCpu The virtual CPU.
5231 */
5232void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5233{
5234 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
5235
5236 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5237 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5238 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5239}
5240
5241
5242/**
5243 * Flushes the entire cache.
5244 *
5245 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5246 * this and execute this CR3 flush.
5247 *
5248 * @param pPool The pool.
5249 */
5250void pgmR3PoolReset(PVM pVM)
5251{
5252 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5253
5254 Assert(PGMIsLockOwner(pVM));
5255 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5256 LogFlow(("pgmR3PoolReset:\n"));
5257
5258 /*
5259 * If there are no pages in the pool, there is nothing to do.
5260 */
5261 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5262 {
5263 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5264 return;
5265 }
5266
5267 /*
5268 * Exit the shadow mode since we're going to clear everything,
5269 * including the root page.
5270 */
5271 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5272 {
5273 PVMCPU pVCpu = &pVM->aCpus[i];
5274 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
5275 }
5276
5277 /*
5278 * Nuke the free list and reinsert all pages into it.
5279 */
5280 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5281 {
5282 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5283
5284 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
5285 if (pPage->fMonitored)
5286 pgmPoolMonitorFlush(pPool, pPage);
5287 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5288 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5289 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5290 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5291 pPage->cModifications = 0;
5292 pPage->GCPhys = NIL_RTGCPHYS;
5293 pPage->enmKind = PGMPOOLKIND_FREE;
5294 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5295 Assert(pPage->idx == i);
5296 pPage->iNext = i + 1;
5297 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5298 pPage->fSeenNonGlobal = false;
5299 pPage->fMonitored = false;
5300#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5301 pPage->fDirty = false;
5302#endif
5303 pPage->fCached = false;
5304 pPage->fReusedFlushPending = false;
5305 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5306 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5307 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5308 pPage->cLocked = 0;
5309 }
5310 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5311 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5312 pPool->cUsedPages = 0;
5313
5314 /*
5315 * Zap and reinitialize the user records.
5316 */
5317 pPool->cPresent = 0;
5318 pPool->iUserFreeHead = 0;
5319 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5320 const unsigned cMaxUsers = pPool->cMaxUsers;
5321 for (unsigned i = 0; i < cMaxUsers; i++)
5322 {
5323 paUsers[i].iNext = i + 1;
5324 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5325 paUsers[i].iUserTable = 0xfffffffe;
5326 }
5327 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5328
5329 /*
5330 * Clear all the GCPhys links and rebuild the phys ext free list.
5331 */
5332 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
5333 pRam;
5334 pRam = pRam->CTX_SUFF(pNext))
5335 {
5336 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5337 while (iPage-- > 0)
5338 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
5339 }
5340
5341 pPool->iPhysExtFreeHead = 0;
5342 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5343 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5344 for (unsigned i = 0; i < cMaxPhysExts; i++)
5345 {
5346 paPhysExts[i].iNext = i + 1;
5347 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5348 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5349 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5350 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5351 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5352 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5353 }
5354 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5355
5356 /*
5357 * Just zap the modified list.
5358 */
5359 pPool->cModifiedPages = 0;
5360 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5361
5362 /*
5363 * Clear the GCPhys hash and the age list.
5364 */
5365 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5366 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5367 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5368 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5369
5370#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5371 /* Clear all dirty pages. */
5372 pPool->idxFreeDirtyPage = 0;
5373 pPool->cDirtyPages = 0;
5374 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
5375 pPool->aDirtyPages[i].uIdx = NIL_PGMPOOL_IDX;
5376#endif
5377
5378 /*
5379 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5380 */
5381 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
5382 {
5383 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5384 pPage->iNext = NIL_PGMPOOL_IDX;
5385 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5386 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5387 pPage->cModifications = 0;
5388 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
5389 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5390 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5391 if (pPage->fMonitored)
5392 {
5393 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK,
5394 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
5395 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
5396 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
5397 pPool->pszAccessHandler);
5398 AssertFatalRCSuccess(rc);
5399 pgmPoolHashInsert(pPool, pPage);
5400 }
5401 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
5402 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
5403 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
5404 }
5405
5406 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5407 {
5408 /*
5409 * Re-enter the shadowing mode and assert Sync CR3 FF.
5410 */
5411 PVMCPU pVCpu = &pVM->aCpus[i];
5412 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5413 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5414 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5415 }
5416
5417 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5418}
5419#endif /* IN_RING3 */
5420
5421#ifdef LOG_ENABLED
5422static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5423{
5424 switch(enmKind)
5425 {
5426 case PGMPOOLKIND_INVALID:
5427 return "PGMPOOLKIND_INVALID";
5428 case PGMPOOLKIND_FREE:
5429 return "PGMPOOLKIND_FREE";
5430 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5431 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5432 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5433 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5434 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5435 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5436 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5437 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5438 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5439 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5440 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5441 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5442 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5443 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5444 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5445 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5446 case PGMPOOLKIND_32BIT_PD:
5447 return "PGMPOOLKIND_32BIT_PD";
5448 case PGMPOOLKIND_32BIT_PD_PHYS:
5449 return "PGMPOOLKIND_32BIT_PD_PHYS";
5450 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5451 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5452 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5453 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5454 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5455 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5456 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5457 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5458 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5459 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5460 case PGMPOOLKIND_PAE_PD_PHYS:
5461 return "PGMPOOLKIND_PAE_PD_PHYS";
5462 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5463 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5464 case PGMPOOLKIND_PAE_PDPT:
5465 return "PGMPOOLKIND_PAE_PDPT";
5466 case PGMPOOLKIND_PAE_PDPT_PHYS:
5467 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5468 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5469 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5470 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5471 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5472 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5473 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5474 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5475 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5476 case PGMPOOLKIND_64BIT_PML4:
5477 return "PGMPOOLKIND_64BIT_PML4";
5478 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5479 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5480 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5481 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5482 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5483 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5484 case PGMPOOLKIND_ROOT_NESTED:
5485 return "PGMPOOLKIND_ROOT_NESTED";
5486 }
5487 return "Unknown kind!";
5488}
5489#endif /* LOG_ENABLED*/
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette