VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 41451

Last change on this file since 41451 was 41386, checked in by vboxsync, 13 years ago

VMM/PGM: minor doxygen consistency.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 208.0 KB
Line 
1/* $Id: PGMAllPool.cpp 41386 2012-05-22 09:04:47Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2010 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#include <VBox/vmm/pgm.h>
24#include <VBox/vmm/mm.h>
25#include <VBox/vmm/em.h>
26#include <VBox/vmm/cpum.h>
27#ifdef IN_RC
28# include <VBox/vmm/patm.h>
29#endif
30#include "PGMInternal.h"
31#include <VBox/vmm/vm.h>
32#include "PGMInline.h"
33#include <VBox/disopcode.h>
34#include <VBox/vmm/hwacc_vmx.h>
35
36#include <VBox/log.h>
37#include <VBox/err.h>
38#include <iprt/asm.h>
39#include <iprt/asm-amd64-x86.h>
40#include <iprt/string.h>
41
42
43/*******************************************************************************
44* Internal Functions *
45*******************************************************************************/
46RT_C_DECLS_BEGIN
47DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
48DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
49static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
50static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
51static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
52#ifndef IN_RING3
53DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
54#endif
55#ifdef LOG_ENABLED
56static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
57#endif
58#if 0 /*defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)*/
59static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
60#endif
61
62int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
63PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
64void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
65void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
66
67RT_C_DECLS_END
68
69
70/**
71 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
72 *
73 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
74 * @param enmKind The page kind.
75 */
76DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
77{
78 switch (enmKind)
79 {
80 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
81 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
82 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
83 return true;
84 default:
85 return false;
86 }
87}
88
89
90/**
91 * Flushes a chain of pages sharing the same access monitor.
92 *
93 * @returns VBox status code suitable for scheduling.
94 * @param pPool The pool.
95 * @param pPage A page in the chain.
96 * @todo VBOXSTRICTRC
97 */
98int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
99{
100 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
101
102 /*
103 * Find the list head.
104 */
105 uint16_t idx = pPage->idx;
106 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
107 {
108 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
109 {
110 idx = pPage->iMonitoredPrev;
111 Assert(idx != pPage->idx);
112 pPage = &pPool->aPages[idx];
113 }
114 }
115
116 /*
117 * Iterate the list flushing each shadow page.
118 */
119 int rc = VINF_SUCCESS;
120 for (;;)
121 {
122 idx = pPage->iMonitoredNext;
123 Assert(idx != pPage->idx);
124 if (pPage->idx >= PGMPOOL_IDX_FIRST)
125 {
126 int rc2 = pgmPoolFlushPage(pPool, pPage);
127 AssertRC(rc2);
128 }
129 /* next */
130 if (idx == NIL_PGMPOOL_IDX)
131 break;
132 pPage = &pPool->aPages[idx];
133 }
134 return rc;
135}
136
137
138/**
139 * Wrapper for getting the current context pointer to the entry being modified.
140 *
141 * @returns VBox status code suitable for scheduling.
142 * @param pVM The VM handle.
143 * @param pvDst Destination address
144 * @param pvSrc Source guest virtual address.
145 * @param GCPhysSrc The source guest physical address.
146 * @param cb Size of data to read
147 */
148DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc,
149 RTGCPHYS GCPhysSrc, size_t cb)
150{
151#if defined(IN_RING3)
152 NOREF(pVM); NOREF(GCPhysSrc);
153 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
154 return VINF_SUCCESS;
155#else
156 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
157 NOREF(pvSrc);
158 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
159#endif
160}
161
162/**
163 * Process shadow entries before they are changed by the guest.
164 *
165 * For PT entries we will clear them. For PD entries, we'll simply check
166 * for mapping conflicts and set the SyncCR3 FF if found.
167 *
168 * @param pVCpu VMCPU handle
169 * @param pPool The pool.
170 * @param pPage The head page.
171 * @param GCPhysFault The guest physical fault address.
172 * @param uAddress In R0 and GC this is the guest context fault address (flat).
173 * In R3 this is the host context 'fault' address.
174 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
175 */
176void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, unsigned cbWrite)
177{
178 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
179 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
180 PVM pVM = pPool->CTX_SUFF(pVM);
181 NOREF(pVCpu);
182
183 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))pvAddress, GCPhysFault, cbWrite));
184
185 for (;;)
186 {
187 union
188 {
189 void *pv;
190 PX86PT pPT;
191 PPGMSHWPTPAE pPTPae;
192 PX86PD pPD;
193 PX86PDPAE pPDPae;
194 PX86PDPT pPDPT;
195 PX86PML4 pPML4;
196 } uShw;
197
198 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s\n", pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
199
200 uShw.pv = NULL;
201 switch (pPage->enmKind)
202 {
203 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
204 {
205 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
206 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
207 const unsigned iShw = off / sizeof(X86PTE);
208 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
209 if (uShw.pPT->a[iShw].n.u1Present)
210 {
211 X86PTE GstPte;
212
213 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
214 AssertRC(rc);
215 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
216 pgmPoolTracDerefGCPhysHint(pPool, pPage,
217 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
218 GstPte.u & X86_PTE_PG_MASK,
219 iShw);
220 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
221 }
222 break;
223 }
224
225 /* page/2 sized */
226 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
227 {
228 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
229 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
230 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
231 {
232 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
233 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
234 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
235 {
236 X86PTE GstPte;
237 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
238 AssertRC(rc);
239
240 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
241 pgmPoolTracDerefGCPhysHint(pPool, pPage,
242 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
243 GstPte.u & X86_PTE_PG_MASK,
244 iShw);
245 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
246 }
247 }
248 break;
249 }
250
251 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
252 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
253 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
254 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
255 {
256 unsigned iGst = off / sizeof(X86PDE);
257 unsigned iShwPdpt = iGst / 256;
258 unsigned iShw = (iGst % 256) * 2;
259 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
260
261 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
262 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
263 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
264 {
265 for (unsigned i = 0; i < 2; i++)
266 {
267# ifndef IN_RING0
268 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
269 {
270 Assert(pgmMapAreMappingsEnabled(pVM));
271 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
272 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
273 break;
274 }
275# endif /* !IN_RING0 */
276 if (uShw.pPDPae->a[iShw+i].n.u1Present)
277 {
278 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
279 pgmPoolFree(pVM,
280 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
281 pPage->idx,
282 iShw + i);
283 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw+i].u, 0);
284 }
285
286 /* paranoia / a bit assumptive. */
287 if ( (off & 3)
288 && (off & 3) + cbWrite > 4)
289 {
290 const unsigned iShw2 = iShw + 2 + i;
291 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
292 {
293# ifndef IN_RING0
294 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
295 {
296 Assert(pgmMapAreMappingsEnabled(pVM));
297 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
298 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
299 break;
300 }
301# endif /* !IN_RING0 */
302 if (uShw.pPDPae->a[iShw2].n.u1Present)
303 {
304 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
305 pgmPoolFree(pVM,
306 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
307 pPage->idx,
308 iShw2);
309 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
310 }
311 }
312 }
313 }
314 }
315 break;
316 }
317
318 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
319 {
320 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
321 const unsigned iShw = off / sizeof(X86PTEPAE);
322 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
323 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
324 {
325 X86PTEPAE GstPte;
326 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
327 AssertRC(rc);
328
329 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
330 pgmPoolTracDerefGCPhysHint(pPool, pPage,
331 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
332 GstPte.u & X86_PTE_PAE_PG_MASK,
333 iShw);
334 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
335 }
336
337 /* paranoia / a bit assumptive. */
338 if ( (off & 7)
339 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
340 {
341 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
342 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
343
344 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
345 {
346 X86PTEPAE GstPte;
347# ifdef IN_RING3
348 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
349# else
350 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
351# endif
352 AssertRC(rc);
353 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
354 pgmPoolTracDerefGCPhysHint(pPool, pPage,
355 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
356 GstPte.u & X86_PTE_PAE_PG_MASK,
357 iShw2);
358 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
359 }
360 }
361 break;
362 }
363
364 case PGMPOOLKIND_32BIT_PD:
365 {
366 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
367 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
368
369 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
370 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
371# ifndef IN_RING0
372 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
373 {
374 Assert(pgmMapAreMappingsEnabled(pVM));
375 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
376 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
377 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
378 break;
379 }
380# endif /* !IN_RING0 */
381# ifndef IN_RING0
382 else
383# endif /* !IN_RING0 */
384 {
385 if (uShw.pPD->a[iShw].n.u1Present)
386 {
387 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
388 pgmPoolFree(pVM,
389 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
390 pPage->idx,
391 iShw);
392 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
393 }
394 }
395 /* paranoia / a bit assumptive. */
396 if ( (off & 3)
397 && (off & 3) + cbWrite > sizeof(X86PTE))
398 {
399 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
400 if ( iShw2 != iShw
401 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
402 {
403# ifndef IN_RING0
404 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
405 {
406 Assert(pgmMapAreMappingsEnabled(pVM));
407 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
408 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
409 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
410 break;
411 }
412# endif /* !IN_RING0 */
413 if (uShw.pPD->a[iShw2].n.u1Present)
414 {
415 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
416 pgmPoolFree(pVM,
417 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
418 pPage->idx,
419 iShw2);
420 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
421 }
422 }
423 }
424#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
425 if ( uShw.pPD->a[iShw].n.u1Present
426 && !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
427 {
428 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
429# ifdef IN_RC /* TLB load - we're pushing things a bit... */
430 ASMProbeReadByte(pvAddress);
431# endif
432 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
433 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
434 }
435#endif
436 break;
437 }
438
439 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
440 {
441 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
442 const unsigned iShw = off / sizeof(X86PDEPAE);
443 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
444#ifndef IN_RING0
445 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
446 {
447 Assert(pgmMapAreMappingsEnabled(pVM));
448 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
449 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
450 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
451 break;
452 }
453#endif /* !IN_RING0 */
454 /*
455 * Causes trouble when the guest uses a PDE to refer to the whole page table level
456 * structure. (Invalidate here; faults later on when it tries to change the page
457 * table entries -> recheck; probably only applies to the RC case.)
458 */
459# ifndef IN_RING0
460 else
461# endif /* !IN_RING0 */
462 {
463 if (uShw.pPDPae->a[iShw].n.u1Present)
464 {
465 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
466 pgmPoolFree(pVM,
467 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
468 pPage->idx,
469 iShw);
470 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
471 }
472 }
473 /* paranoia / a bit assumptive. */
474 if ( (off & 7)
475 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
476 {
477 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
478 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
479
480#ifndef IN_RING0
481 if ( iShw2 != iShw
482 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
483 {
484 Assert(pgmMapAreMappingsEnabled(pVM));
485 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
486 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
487 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
488 break;
489 }
490#endif /* !IN_RING0 */
491# ifndef IN_RING0
492 else
493# endif /* !IN_RING0 */
494 if (uShw.pPDPae->a[iShw2].n.u1Present)
495 {
496 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
497 pgmPoolFree(pVM,
498 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
499 pPage->idx,
500 iShw2);
501 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
502 }
503 }
504 break;
505 }
506
507 case PGMPOOLKIND_PAE_PDPT:
508 {
509 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
510 /*
511 * Hopefully this doesn't happen very often:
512 * - touching unused parts of the page
513 * - messing with the bits of pd pointers without changing the physical address
514 */
515 /* PDPT roots are not page aligned; 32 byte only! */
516 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
517
518 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
519 const unsigned iShw = offPdpt / sizeof(X86PDPE);
520 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
521 {
522# ifndef IN_RING0
523 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
524 {
525 Assert(pgmMapAreMappingsEnabled(pVM));
526 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
527 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
528 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
529 break;
530 }
531# endif /* !IN_RING0 */
532# ifndef IN_RING0
533 else
534# endif /* !IN_RING0 */
535 if (uShw.pPDPT->a[iShw].n.u1Present)
536 {
537 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
538 pgmPoolFree(pVM,
539 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
540 pPage->idx,
541 iShw);
542 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
543 }
544
545 /* paranoia / a bit assumptive. */
546 if ( (offPdpt & 7)
547 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
548 {
549 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
550 if ( iShw2 != iShw
551 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
552 {
553# ifndef IN_RING0
554 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
555 {
556 Assert(pgmMapAreMappingsEnabled(pVM));
557 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
558 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
559 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
560 break;
561 }
562# endif /* !IN_RING0 */
563# ifndef IN_RING0
564 else
565# endif /* !IN_RING0 */
566 if (uShw.pPDPT->a[iShw2].n.u1Present)
567 {
568 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
569 pgmPoolFree(pVM,
570 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
571 pPage->idx,
572 iShw2);
573 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
574 }
575 }
576 }
577 }
578 break;
579 }
580
581#ifndef IN_RC
582 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
583 {
584 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
585 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
586 const unsigned iShw = off / sizeof(X86PDEPAE);
587 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
588 if (uShw.pPDPae->a[iShw].n.u1Present)
589 {
590 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
591 pgmPoolFree(pVM,
592 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
593 pPage->idx,
594 iShw);
595 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
596 }
597 /* paranoia / a bit assumptive. */
598 if ( (off & 7)
599 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
600 {
601 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
602 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
603
604 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
605 if (uShw.pPDPae->a[iShw2].n.u1Present)
606 {
607 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
608 pgmPoolFree(pVM,
609 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
610 pPage->idx,
611 iShw2);
612 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
613 }
614 }
615 break;
616 }
617
618 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
619 {
620 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
621 /*
622 * Hopefully this doesn't happen very often:
623 * - messing with the bits of pd pointers without changing the physical address
624 */
625 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
626 const unsigned iShw = off / sizeof(X86PDPE);
627 if (uShw.pPDPT->a[iShw].n.u1Present)
628 {
629 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
630 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
631 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
632 }
633 /* paranoia / a bit assumptive. */
634 if ( (off & 7)
635 && (off & 7) + cbWrite > sizeof(X86PDPE))
636 {
637 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
638 if (uShw.pPDPT->a[iShw2].n.u1Present)
639 {
640 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
641 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
642 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
643 }
644 }
645 break;
646 }
647
648 case PGMPOOLKIND_64BIT_PML4:
649 {
650 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
651 /*
652 * Hopefully this doesn't happen very often:
653 * - messing with the bits of pd pointers without changing the physical address
654 */
655 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
656 const unsigned iShw = off / sizeof(X86PDPE);
657 if (uShw.pPML4->a[iShw].n.u1Present)
658 {
659 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
660 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
661 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
662 }
663 /* paranoia / a bit assumptive. */
664 if ( (off & 7)
665 && (off & 7) + cbWrite > sizeof(X86PDPE))
666 {
667 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
668 if (uShw.pPML4->a[iShw2].n.u1Present)
669 {
670 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
671 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
672 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
673 }
674 }
675 break;
676 }
677#endif /* IN_RING0 */
678
679 default:
680 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
681 }
682 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
683
684 /* next */
685 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
686 return;
687 pPage = &pPool->aPages[pPage->iMonitoredNext];
688 }
689}
690
691# ifndef IN_RING3
692/**
693 * Checks if a access could be a fork operation in progress.
694 *
695 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
696 *
697 * @returns true if it's likely that we're forking, otherwise false.
698 * @param pPool The pool.
699 * @param pDis The disassembled instruction.
700 * @param offFault The access offset.
701 */
702DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
703{
704 /*
705 * i386 linux is using btr to clear X86_PTE_RW.
706 * The functions involved are (2.6.16 source inspection):
707 * clear_bit
708 * ptep_set_wrprotect
709 * copy_one_pte
710 * copy_pte_range
711 * copy_pmd_range
712 * copy_pud_range
713 * copy_page_range
714 * dup_mmap
715 * dup_mm
716 * copy_mm
717 * copy_process
718 * do_fork
719 */
720 if ( pDis->pCurInstr->opcode == OP_BTR
721 && !(offFault & 4)
722 /** @todo Validate that the bit index is X86_PTE_RW. */
723 )
724 {
725 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
726 return true;
727 }
728 return false;
729}
730
731
732/**
733 * Determine whether the page is likely to have been reused.
734 *
735 * @returns true if we consider the page as being reused for a different purpose.
736 * @returns false if we consider it to still be a paging page.
737 * @param pVM The VM handle.
738 * @param pVCpu VMCPU Handle.
739 * @param pRegFrame Trap register frame.
740 * @param pDis The disassembly info for the faulting instruction.
741 * @param pvFault The fault address.
742 *
743 * @remark The REP prefix check is left to the caller because of STOSD/W.
744 */
745DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
746{
747#ifndef IN_RC
748 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
749 if ( HWACCMHasPendingIrq(pVM)
750 && (pRegFrame->rsp - pvFault) < 32)
751 {
752 /* Fault caused by stack writes while trying to inject an interrupt event. */
753 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
754 return true;
755 }
756#else
757 NOREF(pVM); NOREF(pvFault);
758#endif
759
760 LogFlow(("Reused instr %RGv %d at %RGv param1.flags=%x param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->opcode, pvFault, pDis->param1.flags, pDis->param1.base.reg_gen));
761
762 /* Non-supervisor mode write means it's used for something else. */
763 if (CPUMGetGuestCPL(pVCpu, pRegFrame) != 0)
764 return true;
765
766 switch (pDis->pCurInstr->opcode)
767 {
768 /* call implies the actual push of the return address faulted */
769 case OP_CALL:
770 Log4(("pgmPoolMonitorIsReused: CALL\n"));
771 return true;
772 case OP_PUSH:
773 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
774 return true;
775 case OP_PUSHF:
776 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
777 return true;
778 case OP_PUSHA:
779 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
780 return true;
781 case OP_FXSAVE:
782 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
783 return true;
784 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
785 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
786 return true;
787 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
788 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
789 return true;
790 case OP_MOVSWD:
791 case OP_STOSWD:
792 if ( pDis->prefix == (PREFIX_REP|PREFIX_REX)
793 && pRegFrame->rcx >= 0x40
794 )
795 {
796 Assert(pDis->mode == CPUMODE_64BIT);
797
798 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
799 return true;
800 }
801 return false;
802 }
803 if ( ( (pDis->param1.flags & USE_REG_GEN32)
804 || (pDis->param1.flags & USE_REG_GEN64))
805 && (pDis->param1.base.reg_gen == USE_REG_ESP))
806 {
807 Log4(("pgmPoolMonitorIsReused: ESP\n"));
808 return true;
809 }
810
811 return false;
812}
813
814/**
815 * Flushes the page being accessed.
816 *
817 * @returns VBox status code suitable for scheduling.
818 * @param pVM The VM handle.
819 * @param pVCpu The VMCPU handle.
820 * @param pPool The pool.
821 * @param pPage The pool page (head).
822 * @param pDis The disassembly of the write instruction.
823 * @param pRegFrame The trap register frame.
824 * @param GCPhysFault The fault address as guest physical address.
825 * @param pvFault The fault address.
826 * @todo VBOXSTRICTRC
827 */
828static int pgmPoolAccessHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
829 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
830{
831 NOREF(GCPhysFault);
832
833 /*
834 * First, do the flushing.
835 */
836 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
837
838 /*
839 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
840 * Must do this in raw mode (!); XP boot will fail otherwise.
841 */
842 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
843 if (RT_SUCCESS(rc2))
844 AssertMsg(rc2 == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
845 else if (rc2 == VERR_EM_INTERPRETER)
846 {
847#ifdef IN_RC
848 if (PATMIsPatchGCAddr(pVM, pRegFrame->eip))
849 {
850 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
851 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
852 rc = VINF_SUCCESS;
853 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
854 }
855 else
856#endif
857 {
858 rc = VINF_EM_RAW_EMULATE_INSTR;
859 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
860 }
861 }
862 else
863 rc = VBOXSTRICTRC_VAL(rc2);
864
865 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
866 return rc;
867}
868
869/**
870 * Handles the STOSD write accesses.
871 *
872 * @returns VBox status code suitable for scheduling.
873 * @param pVM The VM handle.
874 * @param pPool The pool.
875 * @param pPage The pool page (head).
876 * @param pDis The disassembly of the write instruction.
877 * @param pRegFrame The trap register frame.
878 * @param GCPhysFault The fault address as guest physical address.
879 * @param pvFault The fault address.
880 */
881DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
882 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
883{
884 unsigned uIncrement = pDis->param1.size;
885 NOREF(pVM);
886
887 Assert(pDis->mode == CPUMODE_32BIT || pDis->mode == CPUMODE_64BIT);
888 Assert(pRegFrame->rcx <= 0x20);
889
890#ifdef VBOX_STRICT
891 if (pDis->opmode == CPUMODE_32BIT)
892 Assert(uIncrement == 4);
893 else
894 Assert(uIncrement == 8);
895#endif
896
897 Log3(("pgmPoolAccessHandlerSTOSD\n"));
898
899 /*
900 * Increment the modification counter and insert it into the list
901 * of modified pages the first time.
902 */
903 if (!pPage->cModifications++)
904 pgmPoolMonitorModifiedInsert(pPool, pPage);
905
906 /*
907 * Execute REP STOSD.
908 *
909 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
910 * write situation, meaning that it's safe to write here.
911 */
912 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
913 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
914 while (pRegFrame->rcx)
915 {
916#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
917 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
918 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
919 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
920#else
921 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
922#endif
923#ifdef IN_RC
924 *(uint32_t *)(uintptr_t)pu32 = pRegFrame->eax;
925#else
926 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
927#endif
928 pu32 += uIncrement;
929 GCPhysFault += uIncrement;
930 pRegFrame->rdi += uIncrement;
931 pRegFrame->rcx--;
932 }
933 pRegFrame->rip += pDis->opsize;
934
935 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
936 return VINF_SUCCESS;
937}
938
939
940/**
941 * Handles the simple write accesses.
942 *
943 * @returns VBox status code suitable for scheduling.
944 * @param pVM The VM handle.
945 * @param pVCpu The VMCPU handle.
946 * @param pPool The pool.
947 * @param pPage The pool page (head).
948 * @param pDis The disassembly of the write instruction.
949 * @param pRegFrame The trap register frame.
950 * @param GCPhysFault The fault address as guest physical address.
951 * @param pvFault The fault address.
952 * @param pfReused Reused state (in/out)
953 */
954DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
955 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
956{
957 Log3(("pgmPoolAccessHandlerSimple\n"));
958 NOREF(pfReused); /* initialized by caller */
959
960 /*
961 * Increment the modification counter and insert it into the list
962 * of modified pages the first time.
963 */
964 if (!pPage->cModifications++)
965 pgmPoolMonitorModifiedInsert(pPool, pPage);
966
967 /*
968 * Clear all the pages. ASSUMES that pvFault is readable.
969 */
970#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
971 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
972 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
973 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
974#else
975 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
976#endif
977
978 /*
979 * Interpret the instruction.
980 */
981 VBOXSTRICTRC rc = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
982 if (RT_SUCCESS(rc))
983 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
984 else if (rc == VERR_EM_INTERPRETER)
985 {
986 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
987 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode));
988 rc = VINF_EM_RAW_EMULATE_INSTR;
989 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
990 }
991
992#if 0 /* experimental code */
993 if (rc == VINF_SUCCESS)
994 {
995 switch (pPage->enmKind)
996 {
997 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
998 {
999 X86PTEPAE GstPte;
1000 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1001 AssertRC(rc);
1002
1003 /* Check the new value written by the guest. If present and with a bogus physical address, then
1004 * it's fairly safe to assume the guest is reusing the PT.
1005 */
1006 if (GstPte.n.u1Present)
1007 {
1008 RTHCPHYS HCPhys = -1;
1009 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1010 if (rc != VINF_SUCCESS)
1011 {
1012 *pfReused = true;
1013 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1014 }
1015 }
1016 break;
1017 }
1018 }
1019 }
1020#endif
1021
1022 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc\n", VBOXSTRICTRC_VAL(rc)));
1023 return VBOXSTRICTRC_VAL(rc);
1024}
1025
1026/**
1027 * \#PF Handler callback for PT write accesses.
1028 *
1029 * @returns VBox status code (appropriate for GC return).
1030 * @param pVM The VM handle.
1031 * @param uErrorCode CPU Error code.
1032 * @param pRegFrame Trap register frame.
1033 * NULL on DMA and other non CPU access.
1034 * @param pvFault The fault address (cr2).
1035 * @param GCPhysFault The GC physical address corresponding to pvFault.
1036 * @param pvUser User argument.
1037 */
1038DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1039{
1040 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1041 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1042 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1043 PVMCPU pVCpu = VMMGetCpu(pVM);
1044 unsigned cMaxModifications;
1045 bool fForcedFlush = false;
1046 NOREF(uErrorCode);
1047
1048 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1049
1050 pgmLock(pVM);
1051 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1052 {
1053 /* Pool page changed while we were waiting for the lock; ignore. */
1054 Log(("CPU%d: pgmPoolAccessHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1055 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1056 pgmUnlock(pVM);
1057 return VINF_SUCCESS;
1058 }
1059#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1060 if (pPage->fDirty)
1061 {
1062 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH));
1063 pgmUnlock(pVM);
1064 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1065 }
1066#endif
1067
1068#if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1069 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1070 {
1071 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1072 void *pvGst;
1073 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1074 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1075 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1076 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1077 }
1078#endif
1079
1080 /*
1081 * Disassemble the faulting instruction.
1082 */
1083 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1084 int rc = EMInterpretDisasOne(pVM, pVCpu, pRegFrame, pDis, NULL);
1085 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1086 {
1087 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1088 pgmUnlock(pVM);
1089 return rc;
1090 }
1091
1092 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1093
1094 /*
1095 * We should ALWAYS have the list head as user parameter. This
1096 * is because we use that page to record the changes.
1097 */
1098 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1099
1100#ifdef IN_RING0
1101 /* Maximum nr of modifications depends on the page type. */
1102 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1103 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1104 cMaxModifications = 4;
1105 else
1106 cMaxModifications = 24;
1107#else
1108 cMaxModifications = 48;
1109#endif
1110
1111 /*
1112 * Incremental page table updates should weigh more than random ones.
1113 * (Only applies when started from offset 0)
1114 */
1115 pVCpu->pgm.s.cPoolAccessHandler++;
1116 if ( pPage->pvLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1117 && pPage->pvLastAccessHandlerRip < pRegFrame->rip + 0x40
1118 && pvFault == (pPage->pvLastAccessHandlerFault + pDis->param1.size)
1119 && pVCpu->pgm.s.cPoolAccessHandler == (pPage->cLastAccessHandlerCount + 1))
1120 {
1121 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1122 Assert(pPage->cModifications < 32000);
1123 pPage->cModifications = pPage->cModifications * 2;
1124 pPage->pvLastAccessHandlerFault = pvFault;
1125 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1126 if (pPage->cModifications >= cMaxModifications)
1127 {
1128 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1129 fForcedFlush = true;
1130 }
1131 }
1132
1133 if (pPage->cModifications >= cMaxModifications)
1134 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1135
1136 /*
1137 * Check if it's worth dealing with.
1138 */
1139 bool fReused = false;
1140 bool fNotReusedNotForking = false;
1141 if ( ( pPage->cModifications < cMaxModifications /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1142 || pgmPoolIsPageLocked(pPage)
1143 )
1144 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1145 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1146 {
1147 /*
1148 * Simple instructions, no REP prefix.
1149 */
1150 if (!(pDis->prefix & (PREFIX_REP | PREFIX_REPNE)))
1151 {
1152 rc = pgmPoolAccessHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1153 if (fReused)
1154 goto flushPage;
1155
1156 /* A mov instruction to change the first page table entry will be remembered so we can detect
1157 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1158 */
1159 if ( rc == VINF_SUCCESS
1160 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1161 && pDis->pCurInstr->opcode == OP_MOV
1162 && (pvFault & PAGE_OFFSET_MASK) == 0)
1163 {
1164 pPage->pvLastAccessHandlerFault = pvFault;
1165 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1166 pPage->pvLastAccessHandlerRip = pRegFrame->rip;
1167 /* Make sure we don't kick out a page too quickly. */
1168 if (pPage->cModifications > 8)
1169 pPage->cModifications = 2;
1170 }
1171 else
1172 if (pPage->pvLastAccessHandlerFault == pvFault)
1173 {
1174 /* ignore the 2nd write to this page table entry. */
1175 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1176 }
1177 else
1178 {
1179 pPage->pvLastAccessHandlerFault = 0;
1180 pPage->pvLastAccessHandlerRip = 0;
1181 }
1182
1183 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1184 pgmUnlock(pVM);
1185 return rc;
1186 }
1187
1188 /*
1189 * Windows is frequently doing small memset() operations (netio test 4k+).
1190 * We have to deal with these or we'll kill the cache and performance.
1191 */
1192 if ( pDis->pCurInstr->opcode == OP_STOSWD
1193 && !pRegFrame->eflags.Bits.u1DF
1194 && pDis->opmode == pDis->mode
1195 && pDis->addrmode == pDis->mode)
1196 {
1197 bool fValidStosd = false;
1198
1199 if ( pDis->mode == CPUMODE_32BIT
1200 && pDis->prefix == PREFIX_REP
1201 && pRegFrame->ecx <= 0x20
1202 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1203 && !((uintptr_t)pvFault & 3)
1204 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1205 )
1206 {
1207 fValidStosd = true;
1208 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1209 }
1210 else
1211 if ( pDis->mode == CPUMODE_64BIT
1212 && pDis->prefix == (PREFIX_REP | PREFIX_REX)
1213 && pRegFrame->rcx <= 0x20
1214 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1215 && !((uintptr_t)pvFault & 7)
1216 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1217 )
1218 {
1219 fValidStosd = true;
1220 }
1221
1222 if (fValidStosd)
1223 {
1224 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1225 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1226 pgmUnlock(pVM);
1227 return rc;
1228 }
1229 }
1230
1231 /* REP prefix, don't bother. */
1232 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1233 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1234 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode, pDis->prefix));
1235 fNotReusedNotForking = true;
1236 }
1237
1238#if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1239 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1240 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1241 */
1242 if ( pPage->cModifications >= cMaxModifications
1243 && !fForcedFlush
1244 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1245 && ( fNotReusedNotForking
1246 || ( !pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
1247 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1248 )
1249 )
1250 {
1251 Assert(!pgmPoolIsPageLocked(pPage));
1252 Assert(pPage->fDirty == false);
1253
1254 /* Flush any monitored duplicates as we will disable write protection. */
1255 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1256 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1257 {
1258 PPGMPOOLPAGE pPageHead = pPage;
1259
1260 /* Find the monitor head. */
1261 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1262 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1263
1264 while (pPageHead)
1265 {
1266 unsigned idxNext = pPageHead->iMonitoredNext;
1267
1268 if (pPageHead != pPage)
1269 {
1270 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1271 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1272 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1273 AssertRC(rc2);
1274 }
1275
1276 if (idxNext == NIL_PGMPOOL_IDX)
1277 break;
1278
1279 pPageHead = &pPool->aPages[idxNext];
1280 }
1281 }
1282
1283 /* The flushing above might fail for locked pages, so double check. */
1284 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1285 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1286 {
1287 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1288
1289 /* Temporarily allow write access to the page table again. */
1290 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK, pPage->GCPhys & PAGE_BASE_GC_MASK);
1291 if (rc == VINF_SUCCESS)
1292 {
1293 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1294 AssertMsg(rc == VINF_SUCCESS
1295 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1296 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1297 || rc == VERR_PAGE_NOT_PRESENT,
1298 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1299
1300 pPage->pvDirtyFault = pvFault;
1301
1302 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1303 pgmUnlock(pVM);
1304 return rc;
1305 }
1306 }
1307 }
1308#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1309
1310 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushModOverflow));
1311flushPage:
1312 /*
1313 * Not worth it, so flush it.
1314 *
1315 * If we considered it to be reused, don't go back to ring-3
1316 * to emulate failed instructions since we usually cannot
1317 * interpret then. This may be a bit risky, in which case
1318 * the reuse detection must be fixed.
1319 */
1320 rc = pgmPoolAccessHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1321 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1322 && fReused)
1323 {
1324 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1325 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1326 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1327 }
1328 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1329 pgmUnlock(pVM);
1330 return rc;
1331}
1332
1333# endif /* !IN_RING3 */
1334
1335# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1336
1337# if defined(VBOX_STRICT) && !defined(IN_RING3)
1338
1339/**
1340 * Check references to guest physical memory in a PAE / PAE page table.
1341 *
1342 * @param pPool The pool.
1343 * @param pPage The page.
1344 * @param pShwPT The shadow page table (mapping of the page).
1345 * @param pGstPT The guest page table.
1346 */
1347static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1348{
1349 unsigned cErrors = 0;
1350 int LastRc = -1; /* initialized to shut up gcc */
1351 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1352 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1353 PVM pVM = pPool->CTX_SUFF(pVM);
1354
1355#ifdef VBOX_STRICT
1356 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1357 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1358#endif
1359 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1360 {
1361 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1362 {
1363 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1364 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1365 if ( rc != VINF_SUCCESS
1366 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1367 {
1368 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1369 LastPTE = i;
1370 LastRc = rc;
1371 LastHCPhys = HCPhys;
1372 cErrors++;
1373
1374 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1375 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1376 AssertRC(rc);
1377
1378 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1379 {
1380 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1381
1382 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1383 {
1384 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1385
1386 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1387 {
1388 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1389 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1390 {
1391 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1392 }
1393 }
1394
1395 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1396 }
1397 }
1398 }
1399 }
1400 }
1401 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1402}
1403
1404/**
1405 * Check references to guest physical memory in a PAE / 32-bit page table.
1406 *
1407 * @param pPool The pool.
1408 * @param pPage The page.
1409 * @param pShwPT The shadow page table (mapping of the page).
1410 * @param pGstPT The guest page table.
1411 */
1412static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1413{
1414 unsigned cErrors = 0;
1415 int LastRc = -1; /* initialized to shut up gcc */
1416 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1417 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1418 PVM pVM = pPool->CTX_SUFF(pVM);
1419
1420#ifdef VBOX_STRICT
1421 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1422 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1423#endif
1424 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1425 {
1426 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1427 {
1428 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1429 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1430 if ( rc != VINF_SUCCESS
1431 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1432 {
1433 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1434 LastPTE = i;
1435 LastRc = rc;
1436 LastHCPhys = HCPhys;
1437 cErrors++;
1438
1439 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1440 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1441 AssertRC(rc);
1442
1443 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1444 {
1445 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1446
1447 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1448 {
1449 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1450
1451 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1452 {
1453 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1454 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1455 {
1456 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1457 }
1458 }
1459
1460 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1461 }
1462 }
1463 }
1464 }
1465 }
1466 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1467}
1468
1469# endif /* VBOX_STRICT && !IN_RING3 */
1470
1471/**
1472 * Clear references to guest physical memory in a PAE / PAE page table.
1473 *
1474 * @returns nr of changed PTEs
1475 * @param pPool The pool.
1476 * @param pPage The page.
1477 * @param pShwPT The shadow page table (mapping of the page).
1478 * @param pGstPT The guest page table.
1479 * @param pOldGstPT The old cached guest page table.
1480 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1481 * @param pfFlush Flush reused page table (out)
1482 */
1483DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1484 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1485{
1486 unsigned cChanged = 0;
1487
1488#ifdef VBOX_STRICT
1489 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1490 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1491#endif
1492 *pfFlush = false;
1493
1494 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1495 {
1496 /* Check the new value written by the guest. If present and with a bogus physical address, then
1497 * it's fairly safe to assume the guest is reusing the PT.
1498 */
1499 if ( fAllowRemoval
1500 && pGstPT->a[i].n.u1Present)
1501 {
1502 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1503 {
1504 *pfFlush = true;
1505 return ++cChanged;
1506 }
1507 }
1508 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1509 {
1510 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1511 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1512 {
1513#ifdef VBOX_STRICT
1514 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1515 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1516 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1517#endif
1518 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1519 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1520 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1521 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1522
1523 if ( uHostAttr == uGuestAttr
1524 && fHostRW <= fGuestRW)
1525 continue;
1526 }
1527 cChanged++;
1528 /* Something was changed, so flush it. */
1529 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1530 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1531 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1532 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1533 }
1534 }
1535 return cChanged;
1536}
1537
1538/**
1539 * Clear references to guest physical memory in a PAE / PAE page table.
1540 *
1541 * @returns nr of changed PTEs
1542 * @param pPool The pool.
1543 * @param pPage The page.
1544 * @param pShwPT The shadow page table (mapping of the page).
1545 * @param pGstPT The guest page table.
1546 * @param pOldGstPT The old cached guest page table.
1547 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1548 * @param pfFlush Flush reused page table (out)
1549 */
1550DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1551 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1552{
1553 unsigned cChanged = 0;
1554
1555#ifdef VBOX_STRICT
1556 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1557 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1558#endif
1559 *pfFlush = false;
1560
1561 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1562 {
1563 /* Check the new value written by the guest. If present and with a bogus physical address, then
1564 * it's fairly safe to assume the guest is reusing the PT.
1565 */
1566 if ( fAllowRemoval
1567 && pGstPT->a[i].n.u1Present)
1568 {
1569 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK))
1570 {
1571 *pfFlush = true;
1572 return ++cChanged;
1573 }
1574 }
1575 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1576 {
1577 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1578 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1579 {
1580#ifdef VBOX_STRICT
1581 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1582 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1583 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1584#endif
1585 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1586 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1587 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1588 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1589
1590 if ( uHostAttr == uGuestAttr
1591 && fHostRW <= fGuestRW)
1592 continue;
1593 }
1594 cChanged++;
1595 /* Something was changed, so flush it. */
1596 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1597 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1598 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1599 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1600 }
1601 }
1602 return cChanged;
1603}
1604
1605/**
1606 * Flush a dirty page
1607 *
1608 * @param pVM The VM handle.
1609 * @param pPool The pool.
1610 * @param idxSlot Dirty array slot index
1611 * @param fAllowRemoval Allow a reused page table to be removed
1612 */
1613static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1614{
1615 PPGMPOOLPAGE pPage;
1616 unsigned idxPage;
1617
1618 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1619 if (pPool->aDirtyPages[idxSlot].uIdx == NIL_PGMPOOL_IDX)
1620 return;
1621
1622 idxPage = pPool->aDirtyPages[idxSlot].uIdx;
1623 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1624 pPage = &pPool->aPages[idxPage];
1625 Assert(pPage->idx == idxPage);
1626 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1627
1628 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1629 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1630
1631#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1632 PVMCPU pVCpu = VMMGetCpu(pVM);
1633 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
1634#endif
1635
1636 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1637 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK);
1638 Assert(rc == VINF_SUCCESS);
1639 pPage->fDirty = false;
1640
1641#ifdef VBOX_STRICT
1642 uint64_t fFlags = 0;
1643 RTHCPHYS HCPhys;
1644 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->pvDirtyFault, &fFlags, &HCPhys);
1645 AssertMsg( ( rc == VINF_SUCCESS
1646 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1647 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1648 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1649 || rc == VERR_PAGE_NOT_PRESENT,
1650 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->pvDirtyFault, rc, fFlags));
1651#endif
1652
1653 /* Flush those PTEs that have changed. */
1654 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1655 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1656 void *pvGst;
1657 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1658 bool fFlush;
1659 unsigned cChanges;
1660
1661 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1662 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1663 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1664 else
1665 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1666 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1667
1668 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1669 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1670 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1671 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1672
1673 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1674 Assert(pPage->cModifications);
1675 if (cChanges < 4)
1676 pPage->cModifications = 1; /* must use > 0 here */
1677 else
1678 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1679
1680 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1681 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1682 pPool->idxFreeDirtyPage = idxSlot;
1683
1684 pPool->cDirtyPages--;
1685 pPool->aDirtyPages[idxSlot].uIdx = NIL_PGMPOOL_IDX;
1686 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1687 if (fFlush)
1688 {
1689 Assert(fAllowRemoval);
1690 Log(("Flush reused page table!\n"));
1691 pgmPoolFlushPage(pPool, pPage);
1692 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1693 }
1694 else
1695 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1696
1697#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1698 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
1699#endif
1700}
1701
1702# ifndef IN_RING3
1703/**
1704 * Add a new dirty page
1705 *
1706 * @param pVM The VM handle.
1707 * @param pPool The pool.
1708 * @param pPage The page.
1709 */
1710void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1711{
1712 unsigned idxFree;
1713
1714 PGM_LOCK_ASSERT_OWNER(pVM);
1715 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1716 Assert(!pPage->fDirty);
1717
1718 idxFree = pPool->idxFreeDirtyPage;
1719 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1720 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1721
1722 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1723 {
1724 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1725 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1726 }
1727 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1728 AssertMsg(pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1729
1730 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1731
1732 /*
1733 * Make a copy of the guest page table as we require valid GCPhys addresses
1734 * when removing references to physical pages.
1735 * (The HCPhys linear lookup is *extremely* expensive!)
1736 */
1737 void *pvGst;
1738 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1739 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst, (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT) ? PAGE_SIZE : PAGE_SIZE/2);
1740# ifdef VBOX_STRICT
1741 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1742 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1743 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1744 else
1745 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1746 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1747# endif
1748 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1749
1750 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1751 pPage->fDirty = true;
1752 pPage->idxDirty = idxFree;
1753 pPool->aDirtyPages[idxFree].uIdx = pPage->idx;
1754 pPool->cDirtyPages++;
1755
1756 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1757 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1758 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1759 {
1760 unsigned i;
1761 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1762 {
1763 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1764 if (pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX)
1765 {
1766 pPool->idxFreeDirtyPage = idxFree;
1767 break;
1768 }
1769 }
1770 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1771 }
1772
1773 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX);
1774 return;
1775}
1776# endif /* !IN_RING3 */
1777
1778/**
1779 * Check if the specified page is dirty (not write monitored)
1780 *
1781 * @return dirty or not
1782 * @param pVM The VM handle.
1783 * @param GCPhys Guest physical address
1784 */
1785bool pgmPoolIsDirtyPage(PVM pVM, RTGCPHYS GCPhys)
1786{
1787 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1788 PGM_LOCK_ASSERT_OWNER(pVM);
1789 if (!pPool->cDirtyPages)
1790 return false;
1791
1792 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1793
1794 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1795 {
1796 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
1797 {
1798 PPGMPOOLPAGE pPage;
1799 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
1800
1801 pPage = &pPool->aPages[idxPage];
1802 if (pPage->GCPhys == GCPhys)
1803 return true;
1804 }
1805 }
1806 return false;
1807}
1808
1809/**
1810 * Reset all dirty pages by reinstating page monitoring.
1811 *
1812 * @param pVM The VM handle.
1813 */
1814void pgmPoolResetDirtyPages(PVM pVM)
1815{
1816 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1817 PGM_LOCK_ASSERT_OWNER(pVM);
1818 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1819
1820 if (!pPool->cDirtyPages)
1821 return;
1822
1823 Log(("pgmPoolResetDirtyPages\n"));
1824 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1825 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1826
1827 pPool->idxFreeDirtyPage = 0;
1828 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1829 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1830 {
1831 unsigned i;
1832 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1833 {
1834 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
1835 {
1836 pPool->idxFreeDirtyPage = i;
1837 break;
1838 }
1839 }
1840 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1841 }
1842
1843 Assert(pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
1844 return;
1845}
1846
1847/**
1848 * Invalidate the PT entry for the specified page
1849 *
1850 * @param pVM The VM handle.
1851 * @param GCPtrPage Guest page to invalidate
1852 */
1853void pgmPoolResetDirtyPage(PVM pVM, RTGCPTR GCPtrPage)
1854{
1855 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1856 PGM_LOCK_ASSERT_OWNER(pVM);
1857 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1858
1859 if (!pPool->cDirtyPages)
1860 return;
1861
1862 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage));
1863 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1864 {
1865 }
1866}
1867
1868/**
1869 * Reset all dirty pages by reinstating page monitoring.
1870 *
1871 * @param pVM The VM handle.
1872 * @param GCPhysPT Physical address of the page table
1873 */
1874void pgmPoolInvalidateDirtyPage(PVM pVM, RTGCPHYS GCPhysPT)
1875{
1876 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1877 PGM_LOCK_ASSERT_OWNER(pVM);
1878 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1879 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
1880
1881 if (!pPool->cDirtyPages)
1882 return;
1883
1884 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1885
1886 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1887 {
1888 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
1889 {
1890 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
1891
1892 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1893 if (pPage->GCPhys == GCPhysPT)
1894 {
1895 idxDirtyPage = i;
1896 break;
1897 }
1898 }
1899 }
1900
1901 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
1902 {
1903 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
1904 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1905 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1906 {
1907 unsigned i;
1908 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1909 {
1910 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
1911 {
1912 pPool->idxFreeDirtyPage = i;
1913 break;
1914 }
1915 }
1916 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1917 }
1918 }
1919}
1920
1921# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1922
1923/**
1924 * Inserts a page into the GCPhys hash table.
1925 *
1926 * @param pPool The pool.
1927 * @param pPage The page.
1928 */
1929DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1930{
1931 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1932 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1933 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1934 pPage->iNext = pPool->aiHash[iHash];
1935 pPool->aiHash[iHash] = pPage->idx;
1936}
1937
1938
1939/**
1940 * Removes a page from the GCPhys hash table.
1941 *
1942 * @param pPool The pool.
1943 * @param pPage The page.
1944 */
1945DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1946{
1947 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1948 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1949 if (pPool->aiHash[iHash] == pPage->idx)
1950 pPool->aiHash[iHash] = pPage->iNext;
1951 else
1952 {
1953 uint16_t iPrev = pPool->aiHash[iHash];
1954 for (;;)
1955 {
1956 const int16_t i = pPool->aPages[iPrev].iNext;
1957 if (i == pPage->idx)
1958 {
1959 pPool->aPages[iPrev].iNext = pPage->iNext;
1960 break;
1961 }
1962 if (i == NIL_PGMPOOL_IDX)
1963 {
1964 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
1965 break;
1966 }
1967 iPrev = i;
1968 }
1969 }
1970 pPage->iNext = NIL_PGMPOOL_IDX;
1971}
1972
1973
1974/**
1975 * Frees up one cache page.
1976 *
1977 * @returns VBox status code.
1978 * @retval VINF_SUCCESS on success.
1979 * @param pPool The pool.
1980 * @param iUser The user index.
1981 */
1982static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1983{
1984#ifndef IN_RC
1985 const PVM pVM = pPool->CTX_SUFF(pVM);
1986#endif
1987 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1988 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1989
1990 /*
1991 * Select one page from the tail of the age list.
1992 */
1993 PPGMPOOLPAGE pPage;
1994 for (unsigned iLoop = 0; ; iLoop++)
1995 {
1996 uint16_t iToFree = pPool->iAgeTail;
1997 if (iToFree == iUser)
1998 iToFree = pPool->aPages[iToFree].iAgePrev;
1999/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2000 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2001 {
2002 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2003 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2004 {
2005 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2006 continue;
2007 iToFree = i;
2008 break;
2009 }
2010 }
2011*/
2012 Assert(iToFree != iUser);
2013 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
2014 pPage = &pPool->aPages[iToFree];
2015
2016 /*
2017 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2018 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2019 */
2020 if (!pgmPoolIsPageLocked(pPage))
2021 break;
2022 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2023 pgmPoolCacheUsed(pPool, pPage);
2024 AssertLogRelReturn(iLoop < 8192, VERR_PGM_POOL_TOO_MANY_LOOPS);
2025 }
2026
2027 /*
2028 * Found a usable page, flush it and return.
2029 */
2030 int rc = pgmPoolFlushPage(pPool, pPage);
2031 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2032 /* todo: find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2033 if (rc == VINF_SUCCESS)
2034 PGM_INVL_ALL_VCPU_TLBS(pVM);
2035 return rc;
2036}
2037
2038
2039/**
2040 * Checks if a kind mismatch is really a page being reused
2041 * or if it's just normal remappings.
2042 *
2043 * @returns true if reused and the cached page (enmKind1) should be flushed
2044 * @returns false if not reused.
2045 * @param enmKind1 The kind of the cached page.
2046 * @param enmKind2 The kind of the requested page.
2047 */
2048static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2049{
2050 switch (enmKind1)
2051 {
2052 /*
2053 * Never reuse them. There is no remapping in non-paging mode.
2054 */
2055 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2056 case PGMPOOLKIND_32BIT_PD_PHYS:
2057 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2058 case PGMPOOLKIND_PAE_PD_PHYS:
2059 case PGMPOOLKIND_PAE_PDPT_PHYS:
2060 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2061 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2062 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2063 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2064 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2065 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2066 return false;
2067
2068 /*
2069 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2070 */
2071 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2072 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2073 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2074 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2075 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2076 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2077 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2078 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2079 case PGMPOOLKIND_32BIT_PD:
2080 case PGMPOOLKIND_PAE_PDPT:
2081 switch (enmKind2)
2082 {
2083 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2084 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2085 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2086 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2087 case PGMPOOLKIND_64BIT_PML4:
2088 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2089 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2090 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2091 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2092 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2093 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2094 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2095 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2096 return true;
2097 default:
2098 return false;
2099 }
2100
2101 /*
2102 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2103 */
2104 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2105 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2106 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2107 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2108 case PGMPOOLKIND_64BIT_PML4:
2109 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2110 switch (enmKind2)
2111 {
2112 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2113 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2114 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2115 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2116 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2117 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2118 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2119 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2120 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2121 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2122 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2123 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2124 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2125 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2126 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2127 return true;
2128 default:
2129 return false;
2130 }
2131
2132 /*
2133 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2134 */
2135 case PGMPOOLKIND_ROOT_NESTED:
2136 return false;
2137
2138 default:
2139 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2140 }
2141}
2142
2143
2144/**
2145 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2146 *
2147 * @returns VBox status code.
2148 * @retval VINF_PGM_CACHED_PAGE on success.
2149 * @retval VERR_FILE_NOT_FOUND if not found.
2150 * @param pPool The pool.
2151 * @param GCPhys The GC physical address of the page we're gonna shadow.
2152 * @param enmKind The kind of mapping.
2153 * @param enmAccess Access type for the mapping (only relevant for big pages)
2154 * @param iUser The shadow page pool index of the user table.
2155 * @param iUserTable The index into the user table (shadowed).
2156 * @param ppPage Where to store the pointer to the page.
2157 */
2158static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2159{
2160 /*
2161 * Look up the GCPhys in the hash.
2162 */
2163 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2164 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2165 if (i != NIL_PGMPOOL_IDX)
2166 {
2167 do
2168 {
2169 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2170 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2171 if (pPage->GCPhys == GCPhys)
2172 {
2173 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2174 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess)
2175 {
2176 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2177 * doesn't flush it in case there are no more free use records.
2178 */
2179 pgmPoolCacheUsed(pPool, pPage);
2180
2181 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2182 if (RT_SUCCESS(rc))
2183 {
2184 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2185 *ppPage = pPage;
2186 if (pPage->cModifications)
2187 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2188 STAM_COUNTER_INC(&pPool->StatCacheHits);
2189 return VINF_PGM_CACHED_PAGE;
2190 }
2191 return rc;
2192 }
2193
2194 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2195 {
2196 /*
2197 * The kind is different. In some cases we should now flush the page
2198 * as it has been reused, but in most cases this is normal remapping
2199 * of PDs as PT or big pages using the GCPhys field in a slightly
2200 * different way than the other kinds.
2201 */
2202 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2203 {
2204 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2205 pgmPoolFlushPage(pPool, pPage);
2206 break;
2207 }
2208 }
2209 }
2210
2211 /* next */
2212 i = pPage->iNext;
2213 } while (i != NIL_PGMPOOL_IDX);
2214 }
2215
2216 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2217 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2218 return VERR_FILE_NOT_FOUND;
2219}
2220
2221
2222/**
2223 * Inserts a page into the cache.
2224 *
2225 * @param pPool The pool.
2226 * @param pPage The cached page.
2227 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2228 */
2229static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2230{
2231 /*
2232 * Insert into the GCPhys hash if the page is fit for that.
2233 */
2234 Assert(!pPage->fCached);
2235 if (fCanBeCached)
2236 {
2237 pPage->fCached = true;
2238 pgmPoolHashInsert(pPool, pPage);
2239 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2240 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2241 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2242 }
2243 else
2244 {
2245 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2246 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2247 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2248 }
2249
2250 /*
2251 * Insert at the head of the age list.
2252 */
2253 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2254 pPage->iAgeNext = pPool->iAgeHead;
2255 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2256 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2257 else
2258 pPool->iAgeTail = pPage->idx;
2259 pPool->iAgeHead = pPage->idx;
2260}
2261
2262
2263/**
2264 * Flushes a cached page.
2265 *
2266 * @param pPool The pool.
2267 * @param pPage The cached page.
2268 */
2269static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2270{
2271 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2272
2273 /*
2274 * Remove the page from the hash.
2275 */
2276 if (pPage->fCached)
2277 {
2278 pPage->fCached = false;
2279 pgmPoolHashRemove(pPool, pPage);
2280 }
2281 else
2282 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2283
2284 /*
2285 * Remove it from the age list.
2286 */
2287 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2288 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2289 else
2290 pPool->iAgeTail = pPage->iAgePrev;
2291 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2292 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2293 else
2294 pPool->iAgeHead = pPage->iAgeNext;
2295 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2296 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2297}
2298
2299
2300/**
2301 * Looks for pages sharing the monitor.
2302 *
2303 * @returns Pointer to the head page.
2304 * @returns NULL if not found.
2305 * @param pPool The Pool
2306 * @param pNewPage The page which is going to be monitored.
2307 */
2308static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2309{
2310 /*
2311 * Look up the GCPhys in the hash.
2312 */
2313 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2314 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2315 if (i == NIL_PGMPOOL_IDX)
2316 return NULL;
2317 do
2318 {
2319 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2320 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2321 && pPage != pNewPage)
2322 {
2323 switch (pPage->enmKind)
2324 {
2325 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2326 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2327 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2328 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2329 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2330 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2331 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2332 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2333 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2334 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2335 case PGMPOOLKIND_64BIT_PML4:
2336 case PGMPOOLKIND_32BIT_PD:
2337 case PGMPOOLKIND_PAE_PDPT:
2338 {
2339 /* find the head */
2340 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2341 {
2342 Assert(pPage->iMonitoredPrev != pPage->idx);
2343 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2344 }
2345 return pPage;
2346 }
2347
2348 /* ignore, no monitoring. */
2349 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2350 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2351 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2352 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2353 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2354 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2355 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2356 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2357 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2358 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2359 case PGMPOOLKIND_ROOT_NESTED:
2360 case PGMPOOLKIND_PAE_PD_PHYS:
2361 case PGMPOOLKIND_PAE_PDPT_PHYS:
2362 case PGMPOOLKIND_32BIT_PD_PHYS:
2363 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2364 break;
2365 default:
2366 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2367 }
2368 }
2369
2370 /* next */
2371 i = pPage->iNext;
2372 } while (i != NIL_PGMPOOL_IDX);
2373 return NULL;
2374}
2375
2376
2377/**
2378 * Enabled write monitoring of a guest page.
2379 *
2380 * @returns VBox status code.
2381 * @retval VINF_SUCCESS on success.
2382 * @param pPool The pool.
2383 * @param pPage The cached page.
2384 */
2385static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2386{
2387 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2388
2389 /*
2390 * Filter out the relevant kinds.
2391 */
2392 switch (pPage->enmKind)
2393 {
2394 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2395 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2396 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2397 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2398 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2399 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2400 case PGMPOOLKIND_64BIT_PML4:
2401 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2402 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2403 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2404 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2405 case PGMPOOLKIND_32BIT_PD:
2406 case PGMPOOLKIND_PAE_PDPT:
2407 break;
2408
2409 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2410 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2411 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2412 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2413 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2414 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2415 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2416 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2417 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2418 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2419 case PGMPOOLKIND_ROOT_NESTED:
2420 /* Nothing to monitor here. */
2421 return VINF_SUCCESS;
2422
2423 case PGMPOOLKIND_32BIT_PD_PHYS:
2424 case PGMPOOLKIND_PAE_PDPT_PHYS:
2425 case PGMPOOLKIND_PAE_PD_PHYS:
2426 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2427 /* Nothing to monitor here. */
2428 return VINF_SUCCESS;
2429 default:
2430 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2431 }
2432
2433 /*
2434 * Install handler.
2435 */
2436 int rc;
2437 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2438 if (pPageHead)
2439 {
2440 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2441 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2442
2443#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2444 if (pPageHead->fDirty)
2445 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirty, false /* do not remove */);
2446#endif
2447
2448 pPage->iMonitoredPrev = pPageHead->idx;
2449 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2450 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2451 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2452 pPageHead->iMonitoredNext = pPage->idx;
2453 rc = VINF_SUCCESS;
2454 }
2455 else
2456 {
2457 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2458 PVM pVM = pPool->CTX_SUFF(pVM);
2459 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2460 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
2461 GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK,
2462 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
2463 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
2464 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
2465 pPool->pszAccessHandler);
2466 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2467 * the heap size should suffice. */
2468 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2469 PVMCPU pVCpu = VMMGetCpu(pVM);
2470 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2471 }
2472 pPage->fMonitored = true;
2473 return rc;
2474}
2475
2476
2477/**
2478 * Disables write monitoring of a guest page.
2479 *
2480 * @returns VBox status code.
2481 * @retval VINF_SUCCESS on success.
2482 * @param pPool The pool.
2483 * @param pPage The cached page.
2484 */
2485static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2486{
2487 /*
2488 * Filter out the relevant kinds.
2489 */
2490 switch (pPage->enmKind)
2491 {
2492 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2493 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2494 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2495 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2496 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2497 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2498 case PGMPOOLKIND_64BIT_PML4:
2499 case PGMPOOLKIND_32BIT_PD:
2500 case PGMPOOLKIND_PAE_PDPT:
2501 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2502 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2503 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2504 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2505 break;
2506
2507 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2508 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2509 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2510 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2511 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2512 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2513 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2514 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2515 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2516 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2517 case PGMPOOLKIND_ROOT_NESTED:
2518 case PGMPOOLKIND_PAE_PD_PHYS:
2519 case PGMPOOLKIND_PAE_PDPT_PHYS:
2520 case PGMPOOLKIND_32BIT_PD_PHYS:
2521 /* Nothing to monitor here. */
2522 Assert(!pPage->fMonitored);
2523 return VINF_SUCCESS;
2524
2525 default:
2526 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2527 }
2528 Assert(pPage->fMonitored);
2529
2530 /*
2531 * Remove the page from the monitored list or uninstall it if last.
2532 */
2533 const PVM pVM = pPool->CTX_SUFF(pVM);
2534 int rc;
2535 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2536 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2537 {
2538 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2539 {
2540 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2541 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2542 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK,
2543 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
2544 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
2545 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
2546 pPool->pszAccessHandler);
2547 AssertFatalRCSuccess(rc);
2548 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2549 }
2550 else
2551 {
2552 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2553 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2554 {
2555 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2556 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2557 }
2558 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2559 rc = VINF_SUCCESS;
2560 }
2561 }
2562 else
2563 {
2564 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2565 AssertFatalRC(rc);
2566 PVMCPU pVCpu = VMMGetCpu(pVM);
2567 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2568 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2569 }
2570 pPage->fMonitored = false;
2571
2572 /*
2573 * Remove it from the list of modified pages (if in it).
2574 */
2575 pgmPoolMonitorModifiedRemove(pPool, pPage);
2576
2577 return rc;
2578}
2579
2580
2581/**
2582 * Inserts the page into the list of modified pages.
2583 *
2584 * @param pPool The pool.
2585 * @param pPage The page.
2586 */
2587void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2588{
2589 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2590 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2591 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2592 && pPool->iModifiedHead != pPage->idx,
2593 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2594 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2595 pPool->iModifiedHead, pPool->cModifiedPages));
2596
2597 pPage->iModifiedNext = pPool->iModifiedHead;
2598 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2599 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2600 pPool->iModifiedHead = pPage->idx;
2601 pPool->cModifiedPages++;
2602#ifdef VBOX_WITH_STATISTICS
2603 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2604 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2605#endif
2606}
2607
2608
2609/**
2610 * Removes the page from the list of modified pages and resets the
2611 * modification counter.
2612 *
2613 * @param pPool The pool.
2614 * @param pPage The page which is believed to be in the list of modified pages.
2615 */
2616static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2617{
2618 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2619 if (pPool->iModifiedHead == pPage->idx)
2620 {
2621 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2622 pPool->iModifiedHead = pPage->iModifiedNext;
2623 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2624 {
2625 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2626 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2627 }
2628 pPool->cModifiedPages--;
2629 }
2630 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2631 {
2632 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2633 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2634 {
2635 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2636 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2637 }
2638 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2639 pPool->cModifiedPages--;
2640 }
2641 else
2642 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2643 pPage->cModifications = 0;
2644}
2645
2646
2647/**
2648 * Zaps the list of modified pages, resetting their modification counters in the process.
2649 *
2650 * @param pVM The VM handle.
2651 */
2652static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2653{
2654 pgmLock(pVM);
2655 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2656 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2657
2658 unsigned cPages = 0; NOREF(cPages);
2659
2660#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2661 pgmPoolResetDirtyPages(pVM);
2662#endif
2663
2664 uint16_t idx = pPool->iModifiedHead;
2665 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2666 while (idx != NIL_PGMPOOL_IDX)
2667 {
2668 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2669 idx = pPage->iModifiedNext;
2670 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2671 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2672 pPage->cModifications = 0;
2673 Assert(++cPages);
2674 }
2675 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2676 pPool->cModifiedPages = 0;
2677 pgmUnlock(pVM);
2678}
2679
2680
2681/**
2682 * Handle SyncCR3 pool tasks
2683 *
2684 * @returns VBox status code.
2685 * @retval VINF_SUCCESS if successfully added.
2686 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2687 * @param pVCpu The VMCPU handle.
2688 * @remark Should only be used when monitoring is available, thus placed in
2689 * the PGMPOOL_WITH_MONITORING #ifdef.
2690 */
2691int pgmPoolSyncCR3(PVMCPU pVCpu)
2692{
2693 PVM pVM = pVCpu->CTX_SUFF(pVM);
2694 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2695
2696 /*
2697 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2698 * Occasionally we will have to clear all the shadow page tables because we wanted
2699 * to monitor a page which was mapped by too many shadowed page tables. This operation
2700 * sometimes referred to as a 'lightweight flush'.
2701 */
2702# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2703 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2704 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2705# else /* !IN_RING3 */
2706 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2707 {
2708 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2709 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2710
2711 /* Make sure all other VCPUs return to ring 3. */
2712 if (pVM->cCpus > 1)
2713 {
2714 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2715 PGM_INVL_ALL_VCPU_TLBS(pVM);
2716 }
2717 return VINF_PGM_SYNC_CR3;
2718 }
2719# endif /* !IN_RING3 */
2720 else
2721 {
2722 pgmPoolMonitorModifiedClearAll(pVM);
2723
2724 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2725 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2726 {
2727 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2728 return pgmPoolSyncCR3(pVCpu);
2729 }
2730 }
2731 return VINF_SUCCESS;
2732}
2733
2734
2735/**
2736 * Frees up at least one user entry.
2737 *
2738 * @returns VBox status code.
2739 * @retval VINF_SUCCESS if successfully added.
2740 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2741 * @param pPool The pool.
2742 * @param iUser The user index.
2743 */
2744static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2745{
2746 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2747 /*
2748 * Just free cached pages in a braindead fashion.
2749 */
2750 /** @todo walk the age list backwards and free the first with usage. */
2751 int rc = VINF_SUCCESS;
2752 do
2753 {
2754 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2755 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2756 rc = rc2;
2757 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2758 return rc;
2759}
2760
2761
2762/**
2763 * Inserts a page into the cache.
2764 *
2765 * This will create user node for the page, insert it into the GCPhys
2766 * hash, and insert it into the age list.
2767 *
2768 * @returns VBox status code.
2769 * @retval VINF_SUCCESS if successfully added.
2770 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2771 * @param pPool The pool.
2772 * @param pPage The cached page.
2773 * @param GCPhys The GC physical address of the page we're gonna shadow.
2774 * @param iUser The user index.
2775 * @param iUserTable The user table index.
2776 */
2777DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2778{
2779 int rc = VINF_SUCCESS;
2780 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2781
2782 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable));
2783
2784#ifdef VBOX_STRICT
2785 /*
2786 * Check that the entry doesn't already exists.
2787 */
2788 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2789 {
2790 uint16_t i = pPage->iUserHead;
2791 do
2792 {
2793 Assert(i < pPool->cMaxUsers);
2794 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2795 i = paUsers[i].iNext;
2796 } while (i != NIL_PGMPOOL_USER_INDEX);
2797 }
2798#endif
2799
2800 /*
2801 * Find free a user node.
2802 */
2803 uint16_t i = pPool->iUserFreeHead;
2804 if (i == NIL_PGMPOOL_USER_INDEX)
2805 {
2806 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2807 if (RT_FAILURE(rc))
2808 return rc;
2809 i = pPool->iUserFreeHead;
2810 }
2811
2812 /*
2813 * Unlink the user node from the free list,
2814 * initialize and insert it into the user list.
2815 */
2816 pPool->iUserFreeHead = paUsers[i].iNext;
2817 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2818 paUsers[i].iUser = iUser;
2819 paUsers[i].iUserTable = iUserTable;
2820 pPage->iUserHead = i;
2821
2822 /*
2823 * Insert into cache and enable monitoring of the guest page if enabled.
2824 *
2825 * Until we implement caching of all levels, including the CR3 one, we'll
2826 * have to make sure we don't try monitor & cache any recursive reuse of
2827 * a monitored CR3 page. Because all windows versions are doing this we'll
2828 * have to be able to do combined access monitoring, CR3 + PT and
2829 * PD + PT (guest PAE).
2830 *
2831 * Update:
2832 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2833 */
2834 const bool fCanBeMonitored = true;
2835 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2836 if (fCanBeMonitored)
2837 {
2838 rc = pgmPoolMonitorInsert(pPool, pPage);
2839 AssertRC(rc);
2840 }
2841 return rc;
2842}
2843
2844
2845/**
2846 * Adds a user reference to a page.
2847 *
2848 * This will move the page to the head of the
2849 *
2850 * @returns VBox status code.
2851 * @retval VINF_SUCCESS if successfully added.
2852 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2853 * @param pPool The pool.
2854 * @param pPage The cached page.
2855 * @param iUser The user index.
2856 * @param iUserTable The user table.
2857 */
2858static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2859{
2860 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2861
2862 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2863
2864# ifdef VBOX_STRICT
2865 /*
2866 * Check that the entry doesn't already exists. We only allow multiple
2867 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
2868 */
2869 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2870 {
2871 uint16_t i = pPage->iUserHead;
2872 do
2873 {
2874 Assert(i < pPool->cMaxUsers);
2875 AssertMsg(iUser != PGMPOOL_IDX_PD || iUser != PGMPOOL_IDX_PDPT || iUser != PGMPOOL_IDX_NESTED_ROOT || iUser != PGMPOOL_IDX_AMD64_CR3 ||
2876 paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2877 i = paUsers[i].iNext;
2878 } while (i != NIL_PGMPOOL_USER_INDEX);
2879 }
2880# endif
2881
2882 /*
2883 * Allocate a user node.
2884 */
2885 uint16_t i = pPool->iUserFreeHead;
2886 if (i == NIL_PGMPOOL_USER_INDEX)
2887 {
2888 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2889 if (RT_FAILURE(rc))
2890 return rc;
2891 i = pPool->iUserFreeHead;
2892 }
2893 pPool->iUserFreeHead = paUsers[i].iNext;
2894
2895 /*
2896 * Initialize the user node and insert it.
2897 */
2898 paUsers[i].iNext = pPage->iUserHead;
2899 paUsers[i].iUser = iUser;
2900 paUsers[i].iUserTable = iUserTable;
2901 pPage->iUserHead = i;
2902
2903# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2904 if (pPage->fDirty)
2905 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirty, false /* do not remove */);
2906# endif
2907
2908 /*
2909 * Tell the cache to update its replacement stats for this page.
2910 */
2911 pgmPoolCacheUsed(pPool, pPage);
2912 return VINF_SUCCESS;
2913}
2914
2915
2916/**
2917 * Frees a user record associated with a page.
2918 *
2919 * This does not clear the entry in the user table, it simply replaces the
2920 * user record to the chain of free records.
2921 *
2922 * @param pPool The pool.
2923 * @param HCPhys The HC physical address of the shadow page.
2924 * @param iUser The shadow page pool index of the user table.
2925 * @param iUserTable The index into the user table (shadowed).
2926 */
2927static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2928{
2929 /*
2930 * Unlink and free the specified user entry.
2931 */
2932 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2933
2934 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2935 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2936 uint16_t i = pPage->iUserHead;
2937 if ( i != NIL_PGMPOOL_USER_INDEX
2938 && paUsers[i].iUser == iUser
2939 && paUsers[i].iUserTable == iUserTable)
2940 {
2941 pPage->iUserHead = paUsers[i].iNext;
2942
2943 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2944 paUsers[i].iNext = pPool->iUserFreeHead;
2945 pPool->iUserFreeHead = i;
2946 return;
2947 }
2948
2949 /* General: Linear search. */
2950 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2951 while (i != NIL_PGMPOOL_USER_INDEX)
2952 {
2953 if ( paUsers[i].iUser == iUser
2954 && paUsers[i].iUserTable == iUserTable)
2955 {
2956 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2957 paUsers[iPrev].iNext = paUsers[i].iNext;
2958 else
2959 pPage->iUserHead = paUsers[i].iNext;
2960
2961 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2962 paUsers[i].iNext = pPool->iUserFreeHead;
2963 pPool->iUserFreeHead = i;
2964 return;
2965 }
2966 iPrev = i;
2967 i = paUsers[i].iNext;
2968 }
2969
2970 /* Fatal: didn't find it */
2971 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
2972 iUser, iUserTable, pPage->GCPhys));
2973}
2974
2975
2976/**
2977 * Gets the entry size of a shadow table.
2978 *
2979 * @param enmKind The kind of page.
2980 *
2981 * @returns The size of the entry in bytes. That is, 4 or 8.
2982 * @returns If the kind is not for a table, an assertion is raised and 0 is
2983 * returned.
2984 */
2985DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2986{
2987 switch (enmKind)
2988 {
2989 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2990 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2991 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2992 case PGMPOOLKIND_32BIT_PD:
2993 case PGMPOOLKIND_32BIT_PD_PHYS:
2994 return 4;
2995
2996 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2997 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2998 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2999 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3000 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3001 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3002 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3003 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3004 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3005 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3006 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3007 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3008 case PGMPOOLKIND_64BIT_PML4:
3009 case PGMPOOLKIND_PAE_PDPT:
3010 case PGMPOOLKIND_ROOT_NESTED:
3011 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3012 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3013 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3014 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3015 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3016 case PGMPOOLKIND_PAE_PD_PHYS:
3017 case PGMPOOLKIND_PAE_PDPT_PHYS:
3018 return 8;
3019
3020 default:
3021 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3022 }
3023}
3024
3025
3026/**
3027 * Gets the entry size of a guest table.
3028 *
3029 * @param enmKind The kind of page.
3030 *
3031 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3032 * @returns If the kind is not for a table, an assertion is raised and 0 is
3033 * returned.
3034 */
3035DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3036{
3037 switch (enmKind)
3038 {
3039 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3040 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3041 case PGMPOOLKIND_32BIT_PD:
3042 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3043 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3044 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3045 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3046 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3047 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3048 return 4;
3049
3050 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3051 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3052 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3053 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3054 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3055 case PGMPOOLKIND_64BIT_PML4:
3056 case PGMPOOLKIND_PAE_PDPT:
3057 return 8;
3058
3059 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3060 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3061 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3062 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3063 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3064 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3065 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3066 case PGMPOOLKIND_ROOT_NESTED:
3067 case PGMPOOLKIND_PAE_PD_PHYS:
3068 case PGMPOOLKIND_PAE_PDPT_PHYS:
3069 case PGMPOOLKIND_32BIT_PD_PHYS:
3070 /** @todo can we return 0? (nobody is calling this...) */
3071 AssertFailed();
3072 return 0;
3073
3074 default:
3075 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3076 }
3077}
3078
3079
3080/**
3081 * Checks one shadow page table entry for a mapping of a physical page.
3082 *
3083 * @returns true / false indicating removal of all relevant PTEs
3084 *
3085 * @param pVM The VM handle.
3086 * @param pPhysPage The guest page in question.
3087 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3088 * @param iShw The shadow page table.
3089 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3090 */
3091static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3092{
3093 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3094 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3095 bool fRet = false;
3096
3097 /*
3098 * Assert sanity.
3099 */
3100 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3101 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3102 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3103
3104 /*
3105 * Then, clear the actual mappings to the page in the shadow PT.
3106 */
3107 switch (pPage->enmKind)
3108 {
3109 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3110 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3111 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3112 {
3113 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3114 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3115 uint32_t u32AndMask = 0;
3116 uint32_t u32OrMask = 0;
3117
3118 if (!fFlushPTEs)
3119 {
3120 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3121 {
3122 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
3123 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
3124 u32OrMask = X86_PTE_RW;
3125 u32AndMask = UINT32_MAX;
3126 fRet = true;
3127 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3128 break;
3129
3130 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3131 u32OrMask = 0;
3132 u32AndMask = ~X86_PTE_RW;
3133 fRet = true;
3134 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3135 break;
3136 default:
3137 /* (shouldn't be here, will assert below) */
3138 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3139 break;
3140 }
3141 }
3142 else
3143 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3144
3145 /* Update the counter if we're removing references. */
3146 if (!u32AndMask)
3147 {
3148 Assert(pPage->cPresent );
3149 Assert(pPool->cPresent);
3150 pPage->cPresent--;
3151 pPool->cPresent--;
3152 }
3153
3154 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3155 {
3156 X86PTE Pte;
3157
3158 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3159 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3160 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3161 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3162
3163 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3164 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3165 return fRet;
3166 }
3167#ifdef LOG_ENABLED
3168 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3169 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3170 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3171 {
3172 Log(("i=%d cFound=%d\n", i, ++cFound));
3173 }
3174#endif
3175 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3176 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3177 break;
3178 }
3179
3180 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3181 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3182 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3183 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3184 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3185 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3186 {
3187 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3188 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3189 uint64_t u64OrMask = 0;
3190 uint64_t u64AndMask = 0;
3191
3192 if (!fFlushPTEs)
3193 {
3194 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3195 {
3196 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3197 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3198 u64OrMask = X86_PTE_RW;
3199 u64AndMask = UINT64_MAX;
3200 fRet = true;
3201 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3202 break;
3203
3204 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3205 u64OrMask = 0;
3206 u64AndMask = ~(uint64_t)X86_PTE_RW;
3207 fRet = true;
3208 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3209 break;
3210
3211 default:
3212 /* (shouldn't be here, will assert below) */
3213 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3214 break;
3215 }
3216 }
3217 else
3218 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3219
3220 /* Update the counter if we're removing references. */
3221 if (!u64AndMask)
3222 {
3223 Assert(pPage->cPresent);
3224 Assert(pPool->cPresent);
3225 pPage->cPresent--;
3226 pPool->cPresent--;
3227 }
3228
3229 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3230 {
3231 X86PTEPAE Pte;
3232
3233 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3234 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3235 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3236 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3237
3238 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3239 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3240 return fRet;
3241 }
3242#ifdef LOG_ENABLED
3243 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3244 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3245 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3246 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3247 Log(("i=%d cFound=%d\n", i, ++cFound));
3248#endif
3249 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3250 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3251 break;
3252 }
3253
3254#ifdef PGM_WITH_LARGE_PAGES
3255 /* Large page case only. */
3256 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3257 {
3258 Assert(pVM->pgm.s.fNestedPaging);
3259
3260 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3261 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3262
3263 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3264 {
3265 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3266 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3267 pPD->a[iPte].u = 0;
3268 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3269
3270 /* Update the counter as we're removing references. */
3271 Assert(pPage->cPresent);
3272 Assert(pPool->cPresent);
3273 pPage->cPresent--;
3274 pPool->cPresent--;
3275
3276 return fRet;
3277 }
3278# ifdef LOG_ENABLED
3279 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3280 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3281 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3282 Log(("i=%d cFound=%d\n", i, ++cFound));
3283# endif
3284 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3285 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3286 break;
3287 }
3288
3289 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3290 case PGMPOOLKIND_PAE_PD_PHYS:
3291 {
3292 Assert(pVM->pgm.s.fNestedPaging);
3293
3294 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3295 PX86PD pPD = (PX86PD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3296
3297 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3298 {
3299 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3300 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3301 pPD->a[iPte].u = 0;
3302 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3303
3304 /* Update the counter as we're removing references. */
3305 Assert(pPage->cPresent);
3306 Assert(pPool->cPresent);
3307 pPage->cPresent--;
3308 pPool->cPresent--;
3309 return fRet;
3310 }
3311# ifdef LOG_ENABLED
3312 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3313 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3314 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3315 Log(("i=%d cFound=%d\n", i, ++cFound));
3316# endif
3317 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3318 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3319 break;
3320 }
3321#endif /* PGM_WITH_LARGE_PAGES */
3322
3323 default:
3324 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3325 }
3326
3327 /* not reached. */
3328#ifndef _MSC_VER
3329 return fRet;
3330#endif
3331}
3332
3333
3334/**
3335 * Scans one shadow page table for mappings of a physical page.
3336 *
3337 * @param pVM The VM handle.
3338 * @param pPhysPage The guest page in question.
3339 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3340 * @param iShw The shadow page table.
3341 */
3342static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3343{
3344 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3345
3346 /* We should only come here with when there's only one reference to this physical page. */
3347 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3348
3349 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3350 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3351 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3352 if (!fKeptPTEs)
3353 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3354 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3355}
3356
3357
3358/**
3359 * Flushes a list of shadow page tables mapping the same physical page.
3360 *
3361 * @param pVM The VM handle.
3362 * @param pPhysPage The guest page in question.
3363 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3364 * @param iPhysExt The physical cross reference extent list to flush.
3365 */
3366static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3367{
3368 PGM_LOCK_ASSERT_OWNER(pVM);
3369 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3370 bool fKeepList = false;
3371
3372 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3373 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3374
3375 const uint16_t iPhysExtStart = iPhysExt;
3376 PPGMPOOLPHYSEXT pPhysExt;
3377 do
3378 {
3379 Assert(iPhysExt < pPool->cMaxPhysExts);
3380 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3381 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3382 {
3383 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3384 {
3385 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3386 if (!fKeptPTEs)
3387 {
3388 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3389 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3390 }
3391 else
3392 fKeepList = true;
3393 }
3394 }
3395 /* next */
3396 iPhysExt = pPhysExt->iNext;
3397 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3398
3399 if (!fKeepList)
3400 {
3401 /* insert the list into the free list and clear the ram range entry. */
3402 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3403 pPool->iPhysExtFreeHead = iPhysExtStart;
3404 /* Invalidate the tracking data. */
3405 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3406 }
3407
3408 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3409}
3410
3411
3412/**
3413 * Flushes all shadow page table mappings of the given guest page.
3414 *
3415 * This is typically called when the host page backing the guest one has been
3416 * replaced or when the page protection was changed due to a guest access
3417 * caught by the monitoring.
3418 *
3419 * @returns VBox status code.
3420 * @retval VINF_SUCCESS if all references has been successfully cleared.
3421 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3422 * pool cleaning. FF and sync flags are set.
3423 *
3424 * @param pVM The VM handle.
3425 * @param GCPhysPage GC physical address of the page in question
3426 * @param pPhysPage The guest page in question.
3427 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3428 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3429 * flushed, it is NOT touched if this isn't necessary.
3430 * The caller MUST initialized this to @a false.
3431 */
3432int pgmPoolTrackUpdateGCPhys(PVM pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3433{
3434 PVMCPU pVCpu = VMMGetCpu(pVM);
3435 pgmLock(pVM);
3436 int rc = VINF_SUCCESS;
3437
3438#ifdef PGM_WITH_LARGE_PAGES
3439 /* Is this page part of a large page? */
3440 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3441 {
3442 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3443 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3444
3445 /* Fetch the large page base. */
3446 PPGMPAGE pLargePage;
3447 if (GCPhysBase != GCPhysPage)
3448 {
3449 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3450 AssertFatal(pLargePage);
3451 }
3452 else
3453 pLargePage = pPhysPage;
3454
3455 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3456
3457 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3458 {
3459 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3460 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3461 pVM->pgm.s.cLargePagesDisabled++;
3462
3463 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3464 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3465
3466 *pfFlushTLBs = true;
3467 pgmUnlock(pVM);
3468 return rc;
3469 }
3470 }
3471#else
3472 NOREF(GCPhysPage);
3473#endif /* PGM_WITH_LARGE_PAGES */
3474
3475 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3476 if (u16)
3477 {
3478 /*
3479 * The zero page is currently screwing up the tracking and we'll
3480 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3481 * is defined, zero pages won't normally be mapped. Some kind of solution
3482 * will be needed for this problem of course, but it will have to wait...
3483 */
3484 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3485 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3486 rc = VINF_PGM_GCPHYS_ALIASED;
3487 else
3488 {
3489# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC) /** @todo we can drop this now. */
3490 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3491 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3492 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
3493# endif
3494
3495 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3496 {
3497 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3498 pgmPoolTrackFlushGCPhysPT(pVM,
3499 pPhysPage,
3500 fFlushPTEs,
3501 PGMPOOL_TD_GET_IDX(u16));
3502 }
3503 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3504 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3505 else
3506 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3507 *pfFlushTLBs = true;
3508
3509# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
3510 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
3511# endif
3512 }
3513 }
3514
3515 if (rc == VINF_PGM_GCPHYS_ALIASED)
3516 {
3517 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3518 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3519 rc = VINF_PGM_SYNC_CR3;
3520 }
3521 pgmUnlock(pVM);
3522 return rc;
3523}
3524
3525
3526/**
3527 * Scans all shadow page tables for mappings of a physical page.
3528 *
3529 * This may be slow, but it's most likely more efficient than cleaning
3530 * out the entire page pool / cache.
3531 *
3532 * @returns VBox status code.
3533 * @retval VINF_SUCCESS if all references has been successfully cleared.
3534 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3535 * a page pool cleaning.
3536 *
3537 * @param pVM The VM handle.
3538 * @param pPhysPage The guest page in question.
3539 */
3540int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3541{
3542 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3543 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3544 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3545 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3546
3547 /*
3548 * There is a limit to what makes sense.
3549 */
3550 if ( pPool->cPresent > 1024
3551 && pVM->cCpus == 1)
3552 {
3553 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3554 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3555 return VINF_PGM_GCPHYS_ALIASED;
3556 }
3557
3558 /*
3559 * Iterate all the pages until we've encountered all that in use.
3560 * This is simple but not quite optimal solution.
3561 */
3562 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P; /** @todo drop X86_PTE_P here as we always test if present separately, anyway. */
3563 const uint32_t u32 = u64; /** @todo move into the 32BIT_PT_xx case */
3564 unsigned cLeft = pPool->cUsedPages;
3565 unsigned iPage = pPool->cCurPages;
3566 while (--iPage >= PGMPOOL_IDX_FIRST)
3567 {
3568 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3569 if ( pPage->GCPhys != NIL_RTGCPHYS
3570 && pPage->cPresent)
3571 {
3572 switch (pPage->enmKind)
3573 {
3574 /*
3575 * We only care about shadow page tables.
3576 */
3577 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3578 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3579 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3580 {
3581 unsigned cPresent = pPage->cPresent;
3582 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3583 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3584 if (pPT->a[i].n.u1Present)
3585 {
3586 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3587 {
3588 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3589 pPT->a[i].u = 0;
3590
3591 /* Update the counter as we're removing references. */
3592 Assert(pPage->cPresent);
3593 Assert(pPool->cPresent);
3594 pPage->cPresent--;
3595 pPool->cPresent--;
3596 }
3597 if (!--cPresent)
3598 break;
3599 }
3600 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3601 break;
3602 }
3603
3604 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3605 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3606 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3607 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3608 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3609 {
3610 unsigned cPresent = pPage->cPresent;
3611 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3612 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3613 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3614 {
3615 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3616 {
3617 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3618 PGMSHWPTEPAE_SET(pPT->a[i], 0); /// @todo why not atomic?
3619
3620 /* Update the counter as we're removing references. */
3621 Assert(pPage->cPresent);
3622 Assert(pPool->cPresent);
3623 pPage->cPresent--;
3624 pPool->cPresent--;
3625 }
3626 if (!--cPresent)
3627 break;
3628 }
3629 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3630 break;
3631 }
3632#ifndef IN_RC
3633 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3634 {
3635 unsigned cPresent = pPage->cPresent;
3636 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3637 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3638 if (pPT->a[i].n.u1Present)
3639 {
3640 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3641 {
3642 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3643 pPT->a[i].u = 0;
3644
3645 /* Update the counter as we're removing references. */
3646 Assert(pPage->cPresent);
3647 Assert(pPool->cPresent);
3648 pPage->cPresent--;
3649 pPool->cPresent--;
3650 }
3651 if (!--cPresent)
3652 break;
3653 }
3654 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3655 break;
3656 }
3657#endif
3658 }
3659 if (!--cLeft)
3660 break;
3661 }
3662 }
3663
3664 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3665 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3666
3667 /*
3668 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3669 */
3670 if (pPool->cPresent > 1024)
3671 {
3672 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3673 return VINF_PGM_GCPHYS_ALIASED;
3674 }
3675
3676 return VINF_SUCCESS;
3677}
3678
3679
3680/**
3681 * Clears the user entry in a user table.
3682 *
3683 * This is used to remove all references to a page when flushing it.
3684 */
3685static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3686{
3687 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3688 Assert(pUser->iUser < pPool->cCurPages);
3689 uint32_t iUserTable = pUser->iUserTable;
3690
3691 /*
3692 * Map the user page.
3693 */
3694 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3695 union
3696 {
3697 uint64_t *pau64;
3698 uint32_t *pau32;
3699 } u;
3700 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3701
3702 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3703
3704 /* Safety precaution in case we change the paging for other modes too in the future. */
3705 Assert(!pgmPoolIsPageLocked(pPage));
3706
3707#ifdef VBOX_STRICT
3708 /*
3709 * Some sanity checks.
3710 */
3711 switch (pUserPage->enmKind)
3712 {
3713 case PGMPOOLKIND_32BIT_PD:
3714 case PGMPOOLKIND_32BIT_PD_PHYS:
3715 Assert(iUserTable < X86_PG_ENTRIES);
3716 break;
3717 case PGMPOOLKIND_PAE_PDPT:
3718 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3719 case PGMPOOLKIND_PAE_PDPT_PHYS:
3720 Assert(iUserTable < 4);
3721 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3722 break;
3723 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3724 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3725 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3726 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3727 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3728 case PGMPOOLKIND_PAE_PD_PHYS:
3729 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3730 break;
3731 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3732 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3733 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3734 break;
3735 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3736 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3737 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3738 break;
3739 case PGMPOOLKIND_64BIT_PML4:
3740 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3741 /* GCPhys >> PAGE_SHIFT is the index here */
3742 break;
3743 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3744 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3745 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3746 break;
3747
3748 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3749 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3750 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3751 break;
3752
3753 case PGMPOOLKIND_ROOT_NESTED:
3754 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3755 break;
3756
3757 default:
3758 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3759 break;
3760 }
3761#endif /* VBOX_STRICT */
3762
3763 /*
3764 * Clear the entry in the user page.
3765 */
3766 switch (pUserPage->enmKind)
3767 {
3768 /* 32-bit entries */
3769 case PGMPOOLKIND_32BIT_PD:
3770 case PGMPOOLKIND_32BIT_PD_PHYS:
3771 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
3772 break;
3773
3774 /* 64-bit entries */
3775 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3776 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3777 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3778 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3779 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3780#ifdef IN_RC
3781 /*
3782 * In 32 bits PAE mode we *must* invalidate the TLB when changing a
3783 * PDPT entry; the CPU fetches them only during cr3 load, so any
3784 * non-present PDPT will continue to cause page faults.
3785 */
3786 ASMReloadCR3();
3787 /* no break */
3788#endif
3789 case PGMPOOLKIND_PAE_PD_PHYS:
3790 case PGMPOOLKIND_PAE_PDPT_PHYS:
3791 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3792 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3793 case PGMPOOLKIND_64BIT_PML4:
3794 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3795 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3796 case PGMPOOLKIND_PAE_PDPT:
3797 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3798 case PGMPOOLKIND_ROOT_NESTED:
3799 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3800 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3801 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
3802 break;
3803
3804 default:
3805 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3806 }
3807 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
3808}
3809
3810
3811/**
3812 * Clears all users of a page.
3813 */
3814static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3815{
3816 /*
3817 * Free all the user records.
3818 */
3819 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3820
3821 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3822 uint16_t i = pPage->iUserHead;
3823 while (i != NIL_PGMPOOL_USER_INDEX)
3824 {
3825 /* Clear enter in user table. */
3826 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3827
3828 /* Free it. */
3829 const uint16_t iNext = paUsers[i].iNext;
3830 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3831 paUsers[i].iNext = pPool->iUserFreeHead;
3832 pPool->iUserFreeHead = i;
3833
3834 /* Next. */
3835 i = iNext;
3836 }
3837 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3838}
3839
3840
3841/**
3842 * Allocates a new physical cross reference extent.
3843 *
3844 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3845 * @param pVM The VM handle.
3846 * @param piPhysExt Where to store the phys ext index.
3847 */
3848PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3849{
3850 PGM_LOCK_ASSERT_OWNER(pVM);
3851 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3852 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3853 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3854 {
3855 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3856 return NULL;
3857 }
3858 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3859 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3860 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3861 *piPhysExt = iPhysExt;
3862 return pPhysExt;
3863}
3864
3865
3866/**
3867 * Frees a physical cross reference extent.
3868 *
3869 * @param pVM The VM handle.
3870 * @param iPhysExt The extent to free.
3871 */
3872void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3873{
3874 PGM_LOCK_ASSERT_OWNER(pVM);
3875 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3876 Assert(iPhysExt < pPool->cMaxPhysExts);
3877 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3878 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3879 {
3880 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3881 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3882 }
3883 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3884 pPool->iPhysExtFreeHead = iPhysExt;
3885}
3886
3887
3888/**
3889 * Frees a physical cross reference extent.
3890 *
3891 * @param pVM The VM handle.
3892 * @param iPhysExt The extent to free.
3893 */
3894void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3895{
3896 PGM_LOCK_ASSERT_OWNER(pVM);
3897 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3898
3899 const uint16_t iPhysExtStart = iPhysExt;
3900 PPGMPOOLPHYSEXT pPhysExt;
3901 do
3902 {
3903 Assert(iPhysExt < pPool->cMaxPhysExts);
3904 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3905 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3906 {
3907 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3908 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3909 }
3910
3911 /* next */
3912 iPhysExt = pPhysExt->iNext;
3913 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3914
3915 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3916 pPool->iPhysExtFreeHead = iPhysExtStart;
3917}
3918
3919
3920/**
3921 * Insert a reference into a list of physical cross reference extents.
3922 *
3923 * @returns The new tracking data for PGMPAGE.
3924 *
3925 * @param pVM The VM handle.
3926 * @param iPhysExt The physical extent index of the list head.
3927 * @param iShwPT The shadow page table index.
3928 * @param iPte Page table entry
3929 *
3930 */
3931static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
3932{
3933 PGM_LOCK_ASSERT_OWNER(pVM);
3934 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3935 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3936
3937 /*
3938 * Special common cases.
3939 */
3940 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
3941 {
3942 paPhysExts[iPhysExt].aidx[1] = iShwPT;
3943 paPhysExts[iPhysExt].apte[1] = iPte;
3944 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
3945 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
3946 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3947 }
3948 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3949 {
3950 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3951 paPhysExts[iPhysExt].apte[2] = iPte;
3952 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
3953 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
3954 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3955 }
3956 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
3957
3958 /*
3959 * General treatment.
3960 */
3961 const uint16_t iPhysExtStart = iPhysExt;
3962 unsigned cMax = 15;
3963 for (;;)
3964 {
3965 Assert(iPhysExt < pPool->cMaxPhysExts);
3966 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3967 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3968 {
3969 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3970 paPhysExts[iPhysExt].apte[i] = iPte;
3971 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
3972 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
3973 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3974 }
3975 if (!--cMax)
3976 {
3977 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackOverflows);
3978 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3979 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
3980 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3981 }
3982
3983 /* advance */
3984 iPhysExt = paPhysExts[iPhysExt].iNext;
3985 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3986 break;
3987 }
3988
3989 /*
3990 * Add another extent to the list.
3991 */
3992 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3993 if (!pNew)
3994 {
3995 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackNoExtentsLeft);
3996 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3997 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
3998 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3999 }
4000 pNew->iNext = iPhysExtStart;
4001 pNew->aidx[0] = iShwPT;
4002 pNew->apte[0] = iPte;
4003 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4004 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4005}
4006
4007
4008/**
4009 * Add a reference to guest physical page where extents are in use.
4010 *
4011 * @returns The new tracking data for PGMPAGE.
4012 *
4013 * @param pVM The VM handle.
4014 * @param pPhysPage Pointer to the aPages entry in the ram range.
4015 * @param u16 The ram range flags (top 16-bits).
4016 * @param iShwPT The shadow page table index.
4017 * @param iPte Page table entry
4018 */
4019uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4020{
4021 pgmLock(pVM);
4022 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4023 {
4024 /*
4025 * Convert to extent list.
4026 */
4027 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4028 uint16_t iPhysExt;
4029 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4030 if (pPhysExt)
4031 {
4032 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4033 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliased);
4034 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4035 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4036 pPhysExt->aidx[1] = iShwPT;
4037 pPhysExt->apte[1] = iPte;
4038 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4039 }
4040 else
4041 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4042 }
4043 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4044 {
4045 /*
4046 * Insert into the extent list.
4047 */
4048 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4049 }
4050 else
4051 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedLots);
4052 pgmUnlock(pVM);
4053 return u16;
4054}
4055
4056
4057/**
4058 * Clear references to guest physical memory.
4059 *
4060 * @param pPool The pool.
4061 * @param pPage The page.
4062 * @param pPhysPage Pointer to the aPages entry in the ram range.
4063 * @param iPte Shadow PTE index
4064 */
4065void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4066{
4067 PVM pVM = pPool->CTX_SUFF(pVM);
4068 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4069 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4070
4071 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4072 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4073 {
4074 pgmLock(pVM);
4075
4076 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4077 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4078 do
4079 {
4080 Assert(iPhysExt < pPool->cMaxPhysExts);
4081
4082 /*
4083 * Look for the shadow page and check if it's all freed.
4084 */
4085 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4086 {
4087 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4088 && paPhysExts[iPhysExt].apte[i] == iPte)
4089 {
4090 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4091 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4092
4093 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4094 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4095 {
4096 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4097 pgmUnlock(pVM);
4098 return;
4099 }
4100
4101 /* we can free the node. */
4102 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4103 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4104 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4105 {
4106 /* lonely node */
4107 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4108 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4109 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4110 }
4111 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4112 {
4113 /* head */
4114 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4115 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4116 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4117 }
4118 else
4119 {
4120 /* in list */
4121 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4122 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4123 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4124 }
4125 iPhysExt = iPhysExtNext;
4126 pgmUnlock(pVM);
4127 return;
4128 }
4129 }
4130
4131 /* next */
4132 iPhysExtPrev = iPhysExt;
4133 iPhysExt = paPhysExts[iPhysExt].iNext;
4134 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4135
4136 pgmUnlock(pVM);
4137 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4138 }
4139 else /* nothing to do */
4140 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4141}
4142
4143/**
4144 * Clear references to guest physical memory.
4145 *
4146 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4147 * physical address is assumed to be correct, so the linear search can be
4148 * skipped and we can assert at an earlier point.
4149 *
4150 * @param pPool The pool.
4151 * @param pPage The page.
4152 * @param HCPhys The host physical address corresponding to the guest page.
4153 * @param GCPhys The guest physical address corresponding to HCPhys.
4154 * @param iPte Shadow PTE index
4155 */
4156static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4157{
4158 /*
4159 * Lookup the page and check if it checks out before derefing it.
4160 */
4161 PVM pVM = pPool->CTX_SUFF(pVM);
4162 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4163 if (pPhysPage)
4164 {
4165 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4166#ifdef LOG_ENABLED
4167 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4168 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4169#endif
4170 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4171 {
4172 Assert(pPage->cPresent);
4173 Assert(pPool->cPresent);
4174 pPage->cPresent--;
4175 pPool->cPresent--;
4176 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4177 return;
4178 }
4179
4180 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp\n",
4181 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage)));
4182 }
4183 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4184}
4185
4186
4187/**
4188 * Clear references to guest physical memory.
4189 *
4190 * @param pPool The pool.
4191 * @param pPage The page.
4192 * @param HCPhys The host physical address corresponding to the guest page.
4193 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4194 * @param iPte Shadow pte index
4195 */
4196void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4197{
4198 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4199
4200 /*
4201 * Try the hint first.
4202 */
4203 RTHCPHYS HCPhysHinted;
4204 PVM pVM = pPool->CTX_SUFF(pVM);
4205 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4206 if (pPhysPage)
4207 {
4208 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4209 Assert(HCPhysHinted);
4210 if (HCPhysHinted == HCPhys)
4211 {
4212 Assert(pPage->cPresent);
4213 Assert(pPool->cPresent);
4214 pPage->cPresent--;
4215 pPool->cPresent--;
4216 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4217 return;
4218 }
4219 }
4220 else
4221 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4222
4223 /*
4224 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4225 */
4226 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4227 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX);
4228 while (pRam)
4229 {
4230 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4231 while (iPage-- > 0)
4232 {
4233 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4234 {
4235 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4236 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4237 Assert(pPage->cPresent);
4238 Assert(pPool->cPresent);
4239 pPage->cPresent--;
4240 pPool->cPresent--;
4241 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4242 return;
4243 }
4244 }
4245 pRam = pRam->CTX_SUFF(pNext);
4246 }
4247
4248 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4249}
4250
4251
4252/**
4253 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4254 *
4255 * @param pPool The pool.
4256 * @param pPage The page.
4257 * @param pShwPT The shadow page table (mapping of the page).
4258 * @param pGstPT The guest page table.
4259 */
4260DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4261{
4262 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4263 {
4264 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4265 if (pShwPT->a[i].n.u1Present)
4266 {
4267 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4268 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4269 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK, i);
4270 if (!pPage->cPresent)
4271 break;
4272 }
4273 }
4274}
4275
4276
4277/**
4278 * Clear references to guest physical memory in a PAE / 32-bit page table.
4279 *
4280 * @param pPool The pool.
4281 * @param pPage The page.
4282 * @param pShwPT The shadow page table (mapping of the page).
4283 * @param pGstPT The guest page table (just a half one).
4284 */
4285DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4286{
4287 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4288 {
4289 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4290 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4291 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4292 {
4293 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4294 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4295 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK, i);
4296 if (!pPage->cPresent)
4297 break;
4298 }
4299 }
4300}
4301
4302
4303/**
4304 * Clear references to guest physical memory in a PAE / PAE page table.
4305 *
4306 * @param pPool The pool.
4307 * @param pPage The page.
4308 * @param pShwPT The shadow page table (mapping of the page).
4309 * @param pGstPT The guest page table.
4310 */
4311DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4312{
4313 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4314 {
4315 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4316 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4317 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4318 {
4319 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4320 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4321 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
4322 if (!pPage->cPresent)
4323 break;
4324 }
4325 }
4326}
4327
4328
4329/**
4330 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4331 *
4332 * @param pPool The pool.
4333 * @param pPage The page.
4334 * @param pShwPT The shadow page table (mapping of the page).
4335 */
4336DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4337{
4338 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4339 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4340 {
4341 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4342 if (pShwPT->a[i].n.u1Present)
4343 {
4344 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4345 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4346 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys, i);
4347 if (!pPage->cPresent)
4348 break;
4349 }
4350 }
4351}
4352
4353
4354/**
4355 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4356 *
4357 * @param pPool The pool.
4358 * @param pPage The page.
4359 * @param pShwPT The shadow page table (mapping of the page).
4360 */
4361DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4362{
4363 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4364 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4365 {
4366 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4367 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4368 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4369 {
4370 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4371 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4372 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys, i);
4373 if (!pPage->cPresent)
4374 break;
4375 }
4376 }
4377}
4378
4379
4380/**
4381 * Clear references to shadowed pages in an EPT page table.
4382 *
4383 * @param pPool The pool.
4384 * @param pPage The page.
4385 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4386 */
4387DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4388{
4389 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4390 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4391 {
4392 Assert((pShwPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4393 if (pShwPT->a[i].n.u1Present)
4394 {
4395 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4396 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4397 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys, i);
4398 if (!pPage->cPresent)
4399 break;
4400 }
4401 }
4402}
4403
4404
4405
4406/**
4407 * Clear references to shadowed pages in a 32 bits page directory.
4408 *
4409 * @param pPool The pool.
4410 * @param pPage The page.
4411 * @param pShwPD The shadow page directory (mapping of the page).
4412 */
4413DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4414{
4415 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4416 {
4417 Assert(!(pShwPD->a[i].u & RT_BIT_32(9)));
4418 if ( pShwPD->a[i].n.u1Present
4419 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4420 )
4421 {
4422 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4423 if (pSubPage)
4424 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4425 else
4426 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4427 }
4428 }
4429}
4430
4431/**
4432 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4433 *
4434 * @param pPool The pool.
4435 * @param pPage The page.
4436 * @param pShwPD The shadow page directory (mapping of the page).
4437 */
4438DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4439{
4440 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4441 {
4442 if ( pShwPD->a[i].n.u1Present
4443 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING))
4444 {
4445#ifdef PGM_WITH_LARGE_PAGES
4446 if (pShwPD->a[i].b.u1Size)
4447 {
4448 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4449 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4450 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */, i);
4451 }
4452 else
4453#endif
4454 {
4455 Assert((pShwPD->a[i].u & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4456 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4457 if (pSubPage)
4458 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4459 else
4460 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4461 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4462 }
4463 }
4464 }
4465}
4466
4467/**
4468 * Clear references to shadowed pages in a PAE page directory pointer table.
4469 *
4470 * @param pPool The pool.
4471 * @param pPage The page.
4472 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4473 */
4474DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4475{
4476 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4477 {
4478 Assert((pShwPDPT->a[i].u & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4479 if ( pShwPDPT->a[i].n.u1Present
4480 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4481 )
4482 {
4483 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4484 if (pSubPage)
4485 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4486 else
4487 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4488 }
4489 }
4490}
4491
4492
4493/**
4494 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4495 *
4496 * @param pPool The pool.
4497 * @param pPage The page.
4498 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4499 */
4500DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4501{
4502 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4503 {
4504 Assert((pShwPDPT->a[i].u & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4505 if (pShwPDPT->a[i].n.u1Present)
4506 {
4507 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4508 if (pSubPage)
4509 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4510 else
4511 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4512 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4513 }
4514 }
4515}
4516
4517
4518/**
4519 * Clear references to shadowed pages in a 64-bit level 4 page table.
4520 *
4521 * @param pPool The pool.
4522 * @param pPage The page.
4523 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4524 */
4525DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4526{
4527 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4528 {
4529 Assert((pShwPML4->a[i].u & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4530 if (pShwPML4->a[i].n.u1Present)
4531 {
4532 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4533 if (pSubPage)
4534 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4535 else
4536 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4537 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4538 }
4539 }
4540}
4541
4542
4543/**
4544 * Clear references to shadowed pages in an EPT page directory.
4545 *
4546 * @param pPool The pool.
4547 * @param pPage The page.
4548 * @param pShwPD The shadow page directory (mapping of the page).
4549 */
4550DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4551{
4552 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4553 {
4554 Assert((pShwPD->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4555 if (pShwPD->a[i].n.u1Present)
4556 {
4557#ifdef PGM_WITH_LARGE_PAGES
4558 if (pShwPD->a[i].b.u1Size)
4559 {
4560 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4561 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4562 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */, i);
4563 }
4564 else
4565#endif
4566 {
4567 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4568 if (pSubPage)
4569 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4570 else
4571 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4572 }
4573 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4574 }
4575 }
4576}
4577
4578
4579/**
4580 * Clear references to shadowed pages in an EPT page directory pointer table.
4581 *
4582 * @param pPool The pool.
4583 * @param pPage The page.
4584 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4585 */
4586DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4587{
4588 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4589 {
4590 Assert((pShwPDPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4591 if (pShwPDPT->a[i].n.u1Present)
4592 {
4593 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4594 if (pSubPage)
4595 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4596 else
4597 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4598 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4599 }
4600 }
4601}
4602
4603
4604/**
4605 * Clears all references made by this page.
4606 *
4607 * This includes other shadow pages and GC physical addresses.
4608 *
4609 * @param pPool The pool.
4610 * @param pPage The page.
4611 */
4612static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4613{
4614 /*
4615 * Map the shadow page and take action according to the page kind.
4616 */
4617 PVM pVM = pPool->CTX_SUFF(pVM);
4618 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4619 switch (pPage->enmKind)
4620 {
4621 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4622 {
4623 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4624 void *pvGst;
4625 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4626 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4627 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4628 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4629 break;
4630 }
4631
4632 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4633 {
4634 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4635 void *pvGst;
4636 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4637 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
4638 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4639 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4640 break;
4641 }
4642
4643 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4644 {
4645 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4646 void *pvGst;
4647 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4648 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
4649 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4650 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4651 break;
4652 }
4653
4654 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4655 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4656 {
4657 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4658 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4659 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4660 break;
4661 }
4662
4663 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4664 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4665 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4666 {
4667 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4668 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
4669 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4670 break;
4671 }
4672
4673 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4674 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4675 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4676 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4677 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4678 case PGMPOOLKIND_PAE_PD_PHYS:
4679 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4680 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4681 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4682 break;
4683
4684 case PGMPOOLKIND_32BIT_PD_PHYS:
4685 case PGMPOOLKIND_32BIT_PD:
4686 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4687 break;
4688
4689 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4690 case PGMPOOLKIND_PAE_PDPT:
4691 case PGMPOOLKIND_PAE_PDPT_PHYS:
4692 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4693 break;
4694
4695 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4696 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4697 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4698 break;
4699
4700 case PGMPOOLKIND_64BIT_PML4:
4701 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4702 break;
4703
4704 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4705 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4706 break;
4707
4708 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4709 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4710 break;
4711
4712 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4713 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4714 break;
4715
4716 default:
4717 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4718 }
4719
4720 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4721 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4722 ASMMemZeroPage(pvShw);
4723 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4724 pPage->fZeroed = true;
4725 Assert(!pPage->cPresent);
4726 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
4727}
4728
4729/**
4730 * Flushes a pool page.
4731 *
4732 * This moves the page to the free list after removing all user references to it.
4733 *
4734 * @returns VBox status code.
4735 * @retval VINF_SUCCESS on success.
4736 * @param pPool The pool.
4737 * @param HCPhys The HC physical address of the shadow page.
4738 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4739 */
4740int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4741{
4742 PVM pVM = pPool->CTX_SUFF(pVM);
4743 bool fFlushRequired = false;
4744
4745 int rc = VINF_SUCCESS;
4746 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4747 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4748 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4749
4750 /*
4751 * Quietly reject any attempts at flushing any of the special root pages.
4752 */
4753 if (pPage->idx < PGMPOOL_IDX_FIRST)
4754 {
4755 AssertFailed(); /* can no longer happen */
4756 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4757 return VINF_SUCCESS;
4758 }
4759
4760 pgmLock(pVM);
4761
4762 /*
4763 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4764 */
4765 if (pgmPoolIsPageLocked(pPage))
4766 {
4767 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4768 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4769 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4770 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4771 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4772 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4773 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4774 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4775 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
4776 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
4777 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4778 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4779 pgmUnlock(pVM);
4780 return VINF_SUCCESS;
4781 }
4782
4783#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4784 /* Start a subset so we won't run out of mapping space. */
4785 PVMCPU pVCpu = VMMGetCpu(pVM);
4786 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
4787#endif
4788
4789 /*
4790 * Mark the page as being in need of an ASMMemZeroPage().
4791 */
4792 pPage->fZeroed = false;
4793
4794#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4795 if (pPage->fDirty)
4796 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirty, false /* do not remove */);
4797#endif
4798
4799 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4800 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4801 fFlushRequired = true;
4802
4803 /*
4804 * Clear the page.
4805 */
4806 pgmPoolTrackClearPageUsers(pPool, pPage);
4807 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4808 pgmPoolTrackDeref(pPool, pPage);
4809 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4810
4811 /*
4812 * Flush it from the cache.
4813 */
4814 pgmPoolCacheFlushPage(pPool, pPage);
4815
4816#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4817 /* Heavy stuff done. */
4818 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
4819#endif
4820
4821 /*
4822 * Deregistering the monitoring.
4823 */
4824 if (pPage->fMonitored)
4825 rc = pgmPoolMonitorFlush(pPool, pPage);
4826
4827 /*
4828 * Free the page.
4829 */
4830 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4831 pPage->iNext = pPool->iFreeHead;
4832 pPool->iFreeHead = pPage->idx;
4833 pPage->enmKind = PGMPOOLKIND_FREE;
4834 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4835 pPage->GCPhys = NIL_RTGCPHYS;
4836 pPage->fReusedFlushPending = false;
4837
4838 pPool->cUsedPages--;
4839
4840 /* Flush the TLBs of all VCPUs if required. */
4841 if ( fFlushRequired
4842 && fFlush)
4843 {
4844 PGM_INVL_ALL_VCPU_TLBS(pVM);
4845 }
4846
4847 pgmUnlock(pVM);
4848 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4849 return rc;
4850}
4851
4852
4853/**
4854 * Frees a usage of a pool page.
4855 *
4856 * The caller is responsible to updating the user table so that it no longer
4857 * references the shadow page.
4858 *
4859 * @param pPool The pool.
4860 * @param HCPhys The HC physical address of the shadow page.
4861 * @param iUser The shadow page pool index of the user table.
4862 * @param iUserTable The index into the user table (shadowed).
4863 */
4864void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4865{
4866 PVM pVM = pPool->CTX_SUFF(pVM);
4867
4868 STAM_PROFILE_START(&pPool->StatFree, a);
4869 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
4870 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4871 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4872 pgmLock(pVM);
4873 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4874 if (!pPage->fCached)
4875 pgmPoolFlushPage(pPool, pPage);
4876 pgmUnlock(pVM);
4877 STAM_PROFILE_STOP(&pPool->StatFree, a);
4878}
4879
4880
4881/**
4882 * Makes one or more free page free.
4883 *
4884 * @returns VBox status code.
4885 * @retval VINF_SUCCESS on success.
4886 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4887 *
4888 * @param pPool The pool.
4889 * @param enmKind Page table kind
4890 * @param iUser The user of the page.
4891 */
4892static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4893{
4894 PVM pVM = pPool->CTX_SUFF(pVM);
4895 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%d\n", iUser));
4896 NOREF(enmKind);
4897
4898 /*
4899 * If the pool isn't full grown yet, expand it.
4900 */
4901 if ( pPool->cCurPages < pPool->cMaxPages
4902#if defined(IN_RC)
4903 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
4904 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4905 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
4906#endif
4907 )
4908 {
4909 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4910#ifdef IN_RING3
4911 int rc = PGMR3PoolGrow(pVM);
4912#else
4913 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
4914#endif
4915 if (RT_FAILURE(rc))
4916 return rc;
4917 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4918 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4919 return VINF_SUCCESS;
4920 }
4921
4922 /*
4923 * Free one cached page.
4924 */
4925 return pgmPoolCacheFreeOne(pPool, iUser);
4926}
4927
4928/**
4929 * Allocates a page from the pool.
4930 *
4931 * This page may actually be a cached page and not in need of any processing
4932 * on the callers part.
4933 *
4934 * @returns VBox status code.
4935 * @retval VINF_SUCCESS if a NEW page was allocated.
4936 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4937 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4938 *
4939 * @param pVM The VM handle.
4940 * @param GCPhys The GC physical address of the page we're gonna shadow.
4941 * For 4MB and 2MB PD entries, it's the first address the
4942 * shadow PT is covering.
4943 * @param enmKind The kind of mapping.
4944 * @param enmAccess Access type for the mapping (only relevant for big pages)
4945 * @param iUser The shadow page pool index of the user table.
4946 * @param iUserTable The index into the user table (shadowed).
4947 * @param fLockPage Lock the page
4948 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4949 */
4950int pgmPoolAllocEx(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable,
4951 bool fLockPage, PPPGMPOOLPAGE ppPage)
4952{
4953 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4954 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4955 LogFlow(("pgmPoolAllocEx: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4956 *ppPage = NULL;
4957 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4958 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4959 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4960
4961 pgmLock(pVM);
4962
4963 if (pPool->fCacheEnabled)
4964 {
4965 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, iUser, iUserTable, ppPage);
4966 if (RT_SUCCESS(rc2))
4967 {
4968 if (fLockPage)
4969 pgmPoolLockPage(pPool, *ppPage);
4970 pgmUnlock(pVM);
4971 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4972 LogFlow(("pgmPoolAllocEx: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4973 return rc2;
4974 }
4975 }
4976
4977 /*
4978 * Allocate a new one.
4979 */
4980 int rc = VINF_SUCCESS;
4981 uint16_t iNew = pPool->iFreeHead;
4982 if (iNew == NIL_PGMPOOL_IDX)
4983 {
4984 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
4985 if (RT_FAILURE(rc))
4986 {
4987 pgmUnlock(pVM);
4988 Log(("pgmPoolAllocEx: returns %Rrc (Free)\n", rc));
4989 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4990 return rc;
4991 }
4992 iNew = pPool->iFreeHead;
4993 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_PGM_POOL_IPE);
4994 }
4995
4996 /* unlink the free head */
4997 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4998 pPool->iFreeHead = pPage->iNext;
4999 pPage->iNext = NIL_PGMPOOL_IDX;
5000
5001 /*
5002 * Initialize it.
5003 */
5004 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5005 pPage->enmKind = enmKind;
5006 pPage->enmAccess = enmAccess;
5007 pPage->GCPhys = GCPhys;
5008 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5009 pPage->fMonitored = false;
5010 pPage->fCached = false;
5011#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5012 pPage->fDirty = false;
5013#endif
5014 pPage->fReusedFlushPending = false;
5015 pPage->cModifications = 0;
5016 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5017 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5018 pPage->cLocked = 0;
5019 pPage->cPresent = 0;
5020 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5021 pPage->pvLastAccessHandlerFault = 0;
5022 pPage->cLastAccessHandlerCount = 0;
5023 pPage->pvLastAccessHandlerRip = 0;
5024
5025 /*
5026 * Insert into the tracking and cache. If this fails, free the page.
5027 */
5028 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5029 if (RT_FAILURE(rc3))
5030 {
5031 pPool->cUsedPages--;
5032 pPage->enmKind = PGMPOOLKIND_FREE;
5033 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5034 pPage->GCPhys = NIL_RTGCPHYS;
5035 pPage->iNext = pPool->iFreeHead;
5036 pPool->iFreeHead = pPage->idx;
5037 pgmUnlock(pVM);
5038 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5039 Log(("pgmPoolAllocEx: returns %Rrc (Insert)\n", rc3));
5040 return rc3;
5041 }
5042
5043 /*
5044 * Commit the allocation, clear the page and return.
5045 */
5046#ifdef VBOX_WITH_STATISTICS
5047 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5048 pPool->cUsedPagesHigh = pPool->cUsedPages;
5049#endif
5050
5051 if (!pPage->fZeroed)
5052 {
5053 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5054 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5055 ASMMemZeroPage(pv);
5056 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5057 }
5058
5059 *ppPage = pPage;
5060 if (fLockPage)
5061 pgmPoolLockPage(pPool, pPage);
5062 pgmUnlock(pVM);
5063 LogFlow(("pgmPoolAllocEx: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5064 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5065 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5066 return rc;
5067}
5068
5069
5070/**
5071 * Frees a usage of a pool page.
5072 *
5073 * @param pVM The VM handle.
5074 * @param HCPhys The HC physical address of the shadow page.
5075 * @param iUser The shadow page pool index of the user table.
5076 * @param iUserTable The index into the user table (shadowed).
5077 */
5078void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5079{
5080 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5081 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5082 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5083}
5084
5085/**
5086 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5087 *
5088 * @returns Pointer to the shadow page structure.
5089 * @param pPool The pool.
5090 * @param HCPhys The HC physical address of the shadow page.
5091 */
5092PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5093{
5094 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5095
5096 /*
5097 * Look up the page.
5098 */
5099 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5100
5101 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5102 return pPage;
5103}
5104
5105
5106/**
5107 * Internal worker for finding a page for debugging purposes, no assertions.
5108 *
5109 * @returns Pointer to the shadow page structure. NULL on if not found.
5110 * @param pPool The pool.
5111 * @param HCPhys The HC physical address of the shadow page.
5112 */
5113PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5114{
5115 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5116 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5117}
5118
5119
5120#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5121/**
5122 * Flush the specified page if present
5123 *
5124 * @param pVM The VM handle.
5125 * @param GCPhys Guest physical address of the page to flush
5126 */
5127void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5128{
5129 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5130
5131 VM_ASSERT_EMT(pVM);
5132
5133 /*
5134 * Look up the GCPhys in the hash.
5135 */
5136 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5137 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5138 if (i == NIL_PGMPOOL_IDX)
5139 return;
5140
5141 do
5142 {
5143 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5144 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5145 {
5146 switch (pPage->enmKind)
5147 {
5148 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5149 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5150 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5151 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5152 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5153 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5154 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5155 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5156 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5157 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5158 case PGMPOOLKIND_64BIT_PML4:
5159 case PGMPOOLKIND_32BIT_PD:
5160 case PGMPOOLKIND_PAE_PDPT:
5161 {
5162 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5163#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5164 if (pPage->fDirty)
5165 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5166 else
5167#endif
5168 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5169 Assert(!pgmPoolIsPageLocked(pPage));
5170 pgmPoolMonitorChainFlush(pPool, pPage);
5171 return;
5172 }
5173
5174 /* ignore, no monitoring. */
5175 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5176 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5177 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5178 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5179 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5180 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5181 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5182 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5183 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5184 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5185 case PGMPOOLKIND_ROOT_NESTED:
5186 case PGMPOOLKIND_PAE_PD_PHYS:
5187 case PGMPOOLKIND_PAE_PDPT_PHYS:
5188 case PGMPOOLKIND_32BIT_PD_PHYS:
5189 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5190 break;
5191
5192 default:
5193 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5194 }
5195 }
5196
5197 /* next */
5198 i = pPage->iNext;
5199 } while (i != NIL_PGMPOOL_IDX);
5200 return;
5201}
5202#endif /* IN_RING3 */
5203
5204#ifdef IN_RING3
5205
5206
5207/**
5208 * Reset CPU on hot plugging.
5209 *
5210 * @param pVM The VM handle.
5211 * @param pVCpu The virtual CPU.
5212 */
5213void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5214{
5215 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5216
5217 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5218 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5219 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5220}
5221
5222
5223/**
5224 * Flushes the entire cache.
5225 *
5226 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5227 * this and execute this CR3 flush.
5228 *
5229 * @param pPool The pool.
5230 */
5231void pgmR3PoolReset(PVM pVM)
5232{
5233 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5234
5235 PGM_LOCK_ASSERT_OWNER(pVM);
5236 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5237 LogFlow(("pgmR3PoolReset:\n"));
5238
5239 /*
5240 * If there are no pages in the pool, there is nothing to do.
5241 */
5242 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5243 {
5244 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5245 return;
5246 }
5247
5248 /*
5249 * Exit the shadow mode since we're going to clear everything,
5250 * including the root page.
5251 */
5252 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5253 pgmR3ExitShadowModeBeforePoolFlush(&pVM->aCpus[i]);
5254
5255 /*
5256 * Nuke the free list and reinsert all pages into it.
5257 */
5258 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5259 {
5260 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5261
5262 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
5263 if (pPage->fMonitored)
5264 pgmPoolMonitorFlush(pPool, pPage);
5265 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5266 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5267 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5268 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5269 pPage->cModifications = 0;
5270 pPage->GCPhys = NIL_RTGCPHYS;
5271 pPage->enmKind = PGMPOOLKIND_FREE;
5272 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5273 Assert(pPage->idx == i);
5274 pPage->iNext = i + 1;
5275 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5276 pPage->fSeenNonGlobal = false;
5277 pPage->fMonitored = false;
5278#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5279 pPage->fDirty = false;
5280#endif
5281 pPage->fCached = false;
5282 pPage->fReusedFlushPending = false;
5283 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5284 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5285 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5286 pPage->cLocked = 0;
5287 }
5288 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5289 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5290 pPool->cUsedPages = 0;
5291
5292 /*
5293 * Zap and reinitialize the user records.
5294 */
5295 pPool->cPresent = 0;
5296 pPool->iUserFreeHead = 0;
5297 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5298 const unsigned cMaxUsers = pPool->cMaxUsers;
5299 for (unsigned i = 0; i < cMaxUsers; i++)
5300 {
5301 paUsers[i].iNext = i + 1;
5302 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5303 paUsers[i].iUserTable = 0xfffffffe;
5304 }
5305 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5306
5307 /*
5308 * Clear all the GCPhys links and rebuild the phys ext free list.
5309 */
5310 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
5311 pRam;
5312 pRam = pRam->CTX_SUFF(pNext))
5313 {
5314 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5315 while (iPage-- > 0)
5316 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5317 }
5318
5319 pPool->iPhysExtFreeHead = 0;
5320 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5321 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5322 for (unsigned i = 0; i < cMaxPhysExts; i++)
5323 {
5324 paPhysExts[i].iNext = i + 1;
5325 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5326 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5327 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5328 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5329 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5330 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5331 }
5332 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5333
5334 /*
5335 * Just zap the modified list.
5336 */
5337 pPool->cModifiedPages = 0;
5338 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5339
5340 /*
5341 * Clear the GCPhys hash and the age list.
5342 */
5343 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5344 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5345 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5346 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5347
5348#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5349 /* Clear all dirty pages. */
5350 pPool->idxFreeDirtyPage = 0;
5351 pPool->cDirtyPages = 0;
5352 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
5353 pPool->aDirtyPages[i].uIdx = NIL_PGMPOOL_IDX;
5354#endif
5355
5356 /*
5357 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5358 */
5359 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
5360 {
5361 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5362 pPage->iNext = NIL_PGMPOOL_IDX;
5363 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5364 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5365 pPage->cModifications = 0;
5366 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
5367 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5368 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5369 if (pPage->fMonitored)
5370 {
5371 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK,
5372 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
5373 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
5374 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
5375 pPool->pszAccessHandler);
5376 AssertFatalRCSuccess(rc);
5377 pgmPoolHashInsert(pPool, pPage);
5378 }
5379 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
5380 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
5381 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
5382 }
5383
5384 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5385 {
5386 /*
5387 * Re-enter the shadowing mode and assert Sync CR3 FF.
5388 */
5389 PVMCPU pVCpu = &pVM->aCpus[i];
5390 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5391 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5392 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5393 }
5394
5395 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5396}
5397#endif /* IN_RING3 */
5398
5399#ifdef LOG_ENABLED
5400static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5401{
5402 switch(enmKind)
5403 {
5404 case PGMPOOLKIND_INVALID:
5405 return "PGMPOOLKIND_INVALID";
5406 case PGMPOOLKIND_FREE:
5407 return "PGMPOOLKIND_FREE";
5408 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5409 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5410 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5411 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5412 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5413 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5414 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5415 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5416 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5417 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5418 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5419 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5420 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5421 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5422 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5423 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5424 case PGMPOOLKIND_32BIT_PD:
5425 return "PGMPOOLKIND_32BIT_PD";
5426 case PGMPOOLKIND_32BIT_PD_PHYS:
5427 return "PGMPOOLKIND_32BIT_PD_PHYS";
5428 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5429 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5430 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5431 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5432 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5433 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5434 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5435 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5436 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5437 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5438 case PGMPOOLKIND_PAE_PD_PHYS:
5439 return "PGMPOOLKIND_PAE_PD_PHYS";
5440 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5441 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5442 case PGMPOOLKIND_PAE_PDPT:
5443 return "PGMPOOLKIND_PAE_PDPT";
5444 case PGMPOOLKIND_PAE_PDPT_PHYS:
5445 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5446 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5447 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5448 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5449 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5450 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5451 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5452 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5453 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5454 case PGMPOOLKIND_64BIT_PML4:
5455 return "PGMPOOLKIND_64BIT_PML4";
5456 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5457 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5458 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5459 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5460 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5461 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5462 case PGMPOOLKIND_ROOT_NESTED:
5463 return "PGMPOOLKIND_ROOT_NESTED";
5464 }
5465 return "Unknown kind!";
5466}
5467#endif /* LOG_ENABLED*/
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette