VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 32390

Last change on this file since 32390 was 32390, checked in by vboxsync, 14 years ago

One more

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 208.4 KB
Line 
1/* $Id: PGMAllPool.cpp 32390 2010-09-10 11:18:03Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2010 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#include <VBox/pgm.h>
24#include <VBox/mm.h>
25#include <VBox/em.h>
26#include <VBox/cpum.h>
27#ifdef IN_RC
28# include <VBox/patm.h>
29#endif
30#include "../PGMInternal.h"
31#include <VBox/vm.h>
32#include "../PGMInline.h"
33#include <VBox/disopcode.h>
34#include <VBox/hwacc_vmx.h>
35
36#include <VBox/log.h>
37#include <VBox/err.h>
38#include <iprt/asm.h>
39#include <iprt/asm-amd64-x86.h>
40#include <iprt/string.h>
41
42
43/*******************************************************************************
44* Internal Functions *
45*******************************************************************************/
46RT_C_DECLS_BEGIN
47static void pgmPoolFlushAllInt(PPGMPOOL pPool);
48DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
49DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
50static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
51static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
52static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53#ifndef IN_RING3
54DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
55#endif
56#ifdef LOG_ENABLED
57static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
58#endif
59#if defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)
60static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
61#endif
62
63int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
64PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
65void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
66void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
67
68RT_C_DECLS_END
69
70
71/**
72 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
73 *
74 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
75 * @param enmKind The page kind.
76 */
77DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
78{
79 switch (enmKind)
80 {
81 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
82 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
83 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
84 return true;
85 default:
86 return false;
87 }
88}
89
90
91/**
92 * Flushes a chain of pages sharing the same access monitor.
93 *
94 * @returns VBox status code suitable for scheduling.
95 * @param pPool The pool.
96 * @param pPage A page in the chain.
97 * @todo VBOXSTRICTRC
98 */
99int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
100{
101 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
102
103 /*
104 * Find the list head.
105 */
106 uint16_t idx = pPage->idx;
107 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
108 {
109 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
110 {
111 idx = pPage->iMonitoredPrev;
112 Assert(idx != pPage->idx);
113 pPage = &pPool->aPages[idx];
114 }
115 }
116
117 /*
118 * Iterate the list flushing each shadow page.
119 */
120 int rc = VINF_SUCCESS;
121 for (;;)
122 {
123 idx = pPage->iMonitoredNext;
124 Assert(idx != pPage->idx);
125 if (pPage->idx >= PGMPOOL_IDX_FIRST)
126 {
127 int rc2 = pgmPoolFlushPage(pPool, pPage);
128 AssertRC(rc2);
129 }
130 /* next */
131 if (idx == NIL_PGMPOOL_IDX)
132 break;
133 pPage = &pPool->aPages[idx];
134 }
135 return rc;
136}
137
138
139/**
140 * Wrapper for getting the current context pointer to the entry being modified.
141 *
142 * @returns VBox status code suitable for scheduling.
143 * @param pVM VM Handle.
144 * @param pvDst Destination address
145 * @param pvSrc Source guest virtual address.
146 * @param GCPhysSrc The source guest physical address.
147 * @param cb Size of data to read
148 */
149DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
150{
151#if defined(IN_RING3)
152 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
153 return VINF_SUCCESS;
154#else
155 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
156 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
157#endif
158}
159
160/**
161 * Process shadow entries before they are changed by the guest.
162 *
163 * For PT entries we will clear them. For PD entries, we'll simply check
164 * for mapping conflicts and set the SyncCR3 FF if found.
165 *
166 * @param pVCpu VMCPU handle
167 * @param pPool The pool.
168 * @param pPage The head page.
169 * @param GCPhysFault The guest physical fault address.
170 * @param uAddress In R0 and GC this is the guest context fault address (flat).
171 * In R3 this is the host context 'fault' address.
172 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
173 */
174void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, unsigned cbWrite)
175{
176 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
177 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
178 PVM pVM = pPool->CTX_SUFF(pVM);
179
180 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))pvAddress, GCPhysFault, cbWrite));
181
182 for (;;)
183 {
184 union
185 {
186 void *pv;
187 PX86PT pPT;
188 PPGMSHWPTPAE pPTPae;
189 PX86PD pPD;
190 PX86PDPAE pPDPae;
191 PX86PDPT pPDPT;
192 PX86PML4 pPML4;
193 } uShw;
194
195 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s\n", pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
196
197 uShw.pv = NULL;
198 switch (pPage->enmKind)
199 {
200 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
201 {
202 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
203 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
204 const unsigned iShw = off / sizeof(X86PTE);
205 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
206 if (uShw.pPT->a[iShw].n.u1Present)
207 {
208 X86PTE GstPte;
209
210 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
211 AssertRC(rc);
212 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
213 pgmPoolTracDerefGCPhysHint(pPool, pPage,
214 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
215 GstPte.u & X86_PTE_PG_MASK,
216 iShw);
217 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
218 }
219 break;
220 }
221
222 /* page/2 sized */
223 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
224 {
225 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
226 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
227 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
228 {
229 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
230 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
231 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
232 {
233 X86PTE GstPte;
234 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
235 AssertRC(rc);
236
237 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
238 pgmPoolTracDerefGCPhysHint(pPool, pPage,
239 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
240 GstPte.u & X86_PTE_PG_MASK,
241 iShw);
242 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
243 }
244 }
245 break;
246 }
247
248 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
249 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
250 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
251 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
252 {
253 unsigned iGst = off / sizeof(X86PDE);
254 unsigned iShwPdpt = iGst / 256;
255 unsigned iShw = (iGst % 256) * 2;
256 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
257
258 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
259 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
260 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
261 {
262 for (unsigned i = 0; i < 2; i++)
263 {
264# ifndef IN_RING0
265 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
266 {
267 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
268 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
269 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
270 break;
271 }
272 else
273# endif /* !IN_RING0 */
274 if (uShw.pPDPae->a[iShw+i].n.u1Present)
275 {
276 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
277 pgmPoolFree(pVM,
278 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
279 pPage->idx,
280 iShw + i);
281 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw+i].u, 0);
282 }
283
284 /* paranoia / a bit assumptive. */
285 if ( (off & 3)
286 && (off & 3) + cbWrite > 4)
287 {
288 const unsigned iShw2 = iShw + 2 + i;
289 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
290 {
291# ifndef IN_RING0
292 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
293 {
294 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
295 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
296 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
297 break;
298 }
299 else
300# endif /* !IN_RING0 */
301 if (uShw.pPDPae->a[iShw2].n.u1Present)
302 {
303 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
304 pgmPoolFree(pVM,
305 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
306 pPage->idx,
307 iShw2);
308 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
309 }
310 }
311 }
312 }
313 }
314 break;
315 }
316
317 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
318 {
319 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
320 const unsigned iShw = off / sizeof(X86PTEPAE);
321 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
322 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
323 {
324 X86PTEPAE GstPte;
325 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
326 AssertRC(rc);
327
328 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
329 pgmPoolTracDerefGCPhysHint(pPool, pPage,
330 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
331 GstPte.u & X86_PTE_PAE_PG_MASK,
332 iShw);
333 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
334 }
335
336 /* paranoia / a bit assumptive. */
337 if ( (off & 7)
338 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
339 {
340 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
341 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
342
343 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
344 {
345 X86PTEPAE GstPte;
346# ifdef IN_RING3
347 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
348# else
349 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
350# endif
351 AssertRC(rc);
352 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
353 pgmPoolTracDerefGCPhysHint(pPool, pPage,
354 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
355 GstPte.u & X86_PTE_PAE_PG_MASK,
356 iShw2);
357 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
358 }
359 }
360 break;
361 }
362
363 case PGMPOOLKIND_32BIT_PD:
364 {
365 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
366 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
367
368 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
369 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
370# ifndef IN_RING0
371 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
372 {
373 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
374 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
375 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
376 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
377 break;
378 }
379# endif /* !IN_RING0 */
380# ifndef IN_RING0
381 else
382# endif /* !IN_RING0 */
383 {
384 if (uShw.pPD->a[iShw].n.u1Present)
385 {
386 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
387 pgmPoolFree(pVM,
388 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
389 pPage->idx,
390 iShw);
391 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
392 }
393 }
394 /* paranoia / a bit assumptive. */
395 if ( (off & 3)
396 && (off & 3) + cbWrite > sizeof(X86PTE))
397 {
398 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
399 if ( iShw2 != iShw
400 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
401 {
402# ifndef IN_RING0
403 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
404 {
405 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
406 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
407 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
408 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
409 break;
410 }
411# endif /* !IN_RING0 */
412# ifndef IN_RING0
413 else
414# endif /* !IN_RING0 */
415 {
416 if (uShw.pPD->a[iShw2].n.u1Present)
417 {
418 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
419 pgmPoolFree(pVM,
420 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
421 pPage->idx,
422 iShw2);
423 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
424 }
425 }
426 }
427 }
428#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
429 if ( uShw.pPD->a[iShw].n.u1Present
430 && !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
431 {
432 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
433# ifdef IN_RC /* TLB load - we're pushing things a bit... */
434 ASMProbeReadByte(pvAddress);
435# endif
436 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
437 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
438 }
439#endif
440 break;
441 }
442
443 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
444 {
445 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
446 const unsigned iShw = off / sizeof(X86PDEPAE);
447 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
448#ifndef IN_RING0
449 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
450 {
451 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
452 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
453 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
454 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
455 break;
456 }
457#endif /* !IN_RING0 */
458 /*
459 * Causes trouble when the guest uses a PDE to refer to the whole page table level
460 * structure. (Invalidate here; faults later on when it tries to change the page
461 * table entries -> recheck; probably only applies to the RC case.)
462 */
463# ifndef IN_RING0
464 else
465# endif /* !IN_RING0 */
466 {
467 if (uShw.pPDPae->a[iShw].n.u1Present)
468 {
469 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
470 pgmPoolFree(pVM,
471 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
472 pPage->idx,
473 iShw);
474 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
475 }
476 }
477 /* paranoia / a bit assumptive. */
478 if ( (off & 7)
479 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
480 {
481 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
482 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
483
484#ifndef IN_RING0
485 if ( iShw2 != iShw
486 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
487 {
488 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
489 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
490 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
491 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
492 break;
493 }
494#endif /* !IN_RING0 */
495# ifndef IN_RING0
496 else
497# endif /* !IN_RING0 */
498 if (uShw.pPDPae->a[iShw2].n.u1Present)
499 {
500 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
501 pgmPoolFree(pVM,
502 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
503 pPage->idx,
504 iShw2);
505 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
506 }
507 }
508 break;
509 }
510
511 case PGMPOOLKIND_PAE_PDPT:
512 {
513 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
514 /*
515 * Hopefully this doesn't happen very often:
516 * - touching unused parts of the page
517 * - messing with the bits of pd pointers without changing the physical address
518 */
519 /* PDPT roots are not page aligned; 32 byte only! */
520 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
521
522 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
523 const unsigned iShw = offPdpt / sizeof(X86PDPE);
524 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
525 {
526# ifndef IN_RING0
527 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
528 {
529 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
530 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
531 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
532 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
533 break;
534 }
535# endif /* !IN_RING0 */
536# ifndef IN_RING0
537 else
538# endif /* !IN_RING0 */
539 if (uShw.pPDPT->a[iShw].n.u1Present)
540 {
541 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
542 pgmPoolFree(pVM,
543 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
544 pPage->idx,
545 iShw);
546 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
547 }
548
549 /* paranoia / a bit assumptive. */
550 if ( (offPdpt & 7)
551 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
552 {
553 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
554 if ( iShw2 != iShw
555 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
556 {
557# ifndef IN_RING0
558 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
559 {
560 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
561 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
562 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
563 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
564 break;
565 }
566# endif /* !IN_RING0 */
567# ifndef IN_RING0
568 else
569# endif /* !IN_RING0 */
570 if (uShw.pPDPT->a[iShw2].n.u1Present)
571 {
572 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
573 pgmPoolFree(pVM,
574 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
575 pPage->idx,
576 iShw2);
577 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
578 }
579 }
580 }
581 }
582 break;
583 }
584
585#ifndef IN_RC
586 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
587 {
588 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
589 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
590 const unsigned iShw = off / sizeof(X86PDEPAE);
591 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
592 if (uShw.pPDPae->a[iShw].n.u1Present)
593 {
594 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
595 pgmPoolFree(pVM,
596 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
597 pPage->idx,
598 iShw);
599 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
600 }
601 /* paranoia / a bit assumptive. */
602 if ( (off & 7)
603 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
604 {
605 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
606 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
607
608 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
609 if (uShw.pPDPae->a[iShw2].n.u1Present)
610 {
611 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
612 pgmPoolFree(pVM,
613 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
614 pPage->idx,
615 iShw2);
616 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
617 }
618 }
619 break;
620 }
621
622 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
623 {
624 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
625 /*
626 * Hopefully this doesn't happen very often:
627 * - messing with the bits of pd pointers without changing the physical address
628 */
629 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
630 const unsigned iShw = off / sizeof(X86PDPE);
631 if (uShw.pPDPT->a[iShw].n.u1Present)
632 {
633 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
634 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
635 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
636 }
637 /* paranoia / a bit assumptive. */
638 if ( (off & 7)
639 && (off & 7) + cbWrite > sizeof(X86PDPE))
640 {
641 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
642 if (uShw.pPDPT->a[iShw2].n.u1Present)
643 {
644 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
645 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
646 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
647 }
648 }
649 break;
650 }
651
652 case PGMPOOLKIND_64BIT_PML4:
653 {
654 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
655 /*
656 * Hopefully this doesn't happen very often:
657 * - messing with the bits of pd pointers without changing the physical address
658 */
659 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
660 const unsigned iShw = off / sizeof(X86PDPE);
661 if (uShw.pPML4->a[iShw].n.u1Present)
662 {
663 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
664 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
665 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
666 }
667 /* paranoia / a bit assumptive. */
668 if ( (off & 7)
669 && (off & 7) + cbWrite > sizeof(X86PDPE))
670 {
671 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
672 if (uShw.pPML4->a[iShw2].n.u1Present)
673 {
674 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
675 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
676 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
677 }
678 }
679 break;
680 }
681#endif /* IN_RING0 */
682
683 default:
684 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
685 }
686 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
687
688 /* next */
689 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
690 return;
691 pPage = &pPool->aPages[pPage->iMonitoredNext];
692 }
693}
694
695# ifndef IN_RING3
696/**
697 * Checks if a access could be a fork operation in progress.
698 *
699 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
700 *
701 * @returns true if it's likly that we're forking, otherwise false.
702 * @param pPool The pool.
703 * @param pDis The disassembled instruction.
704 * @param offFault The access offset.
705 */
706DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
707{
708 /*
709 * i386 linux is using btr to clear X86_PTE_RW.
710 * The functions involved are (2.6.16 source inspection):
711 * clear_bit
712 * ptep_set_wrprotect
713 * copy_one_pte
714 * copy_pte_range
715 * copy_pmd_range
716 * copy_pud_range
717 * copy_page_range
718 * dup_mmap
719 * dup_mm
720 * copy_mm
721 * copy_process
722 * do_fork
723 */
724 if ( pDis->pCurInstr->opcode == OP_BTR
725 && !(offFault & 4)
726 /** @todo Validate that the bit index is X86_PTE_RW. */
727 )
728 {
729 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
730 return true;
731 }
732 return false;
733}
734
735
736/**
737 * Determine whether the page is likely to have been reused.
738 *
739 * @returns true if we consider the page as being reused for a different purpose.
740 * @returns false if we consider it to still be a paging page.
741 * @param pVM VM Handle.
742 * @param pVCpu VMCPU Handle.
743 * @param pRegFrame Trap register frame.
744 * @param pDis The disassembly info for the faulting instruction.
745 * @param pvFault The fault address.
746 *
747 * @remark The REP prefix check is left to the caller because of STOSD/W.
748 */
749DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
750{
751#ifndef IN_RC
752 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
753 if ( HWACCMHasPendingIrq(pVM)
754 && (pRegFrame->rsp - pvFault) < 32)
755 {
756 /* Fault caused by stack writes while trying to inject an interrupt event. */
757 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
758 return true;
759 }
760#else
761 NOREF(pVM); NOREF(pvFault);
762#endif
763
764 LogFlow(("Reused instr %RGv %d at %RGv param1.flags=%x param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->opcode, pvFault, pDis->param1.flags, pDis->param1.base.reg_gen));
765
766 /* Non-supervisor mode write means it's used for something else. */
767 if (CPUMGetGuestCPL(pVCpu, pRegFrame) != 0)
768 return true;
769
770 switch (pDis->pCurInstr->opcode)
771 {
772 /* call implies the actual push of the return address faulted */
773 case OP_CALL:
774 Log4(("pgmPoolMonitorIsReused: CALL\n"));
775 return true;
776 case OP_PUSH:
777 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
778 return true;
779 case OP_PUSHF:
780 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
781 return true;
782 case OP_PUSHA:
783 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
784 return true;
785 case OP_FXSAVE:
786 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
787 return true;
788 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
789 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
790 return true;
791 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
792 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
793 return true;
794 case OP_MOVSWD:
795 case OP_STOSWD:
796 if ( pDis->prefix == (PREFIX_REP|PREFIX_REX)
797 && pRegFrame->rcx >= 0x40
798 )
799 {
800 Assert(pDis->mode == CPUMODE_64BIT);
801
802 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
803 return true;
804 }
805 return false;
806 }
807 if ( ( (pDis->param1.flags & USE_REG_GEN32)
808 || (pDis->param1.flags & USE_REG_GEN64))
809 && (pDis->param1.base.reg_gen == USE_REG_ESP))
810 {
811 Log4(("pgmPoolMonitorIsReused: ESP\n"));
812 return true;
813 }
814
815 return false;
816}
817
818/**
819 * Flushes the page being accessed.
820 *
821 * @returns VBox status code suitable for scheduling.
822 * @param pVM The VM handle.
823 * @param pVCpu The VMCPU handle.
824 * @param pPool The pool.
825 * @param pPage The pool page (head).
826 * @param pDis The disassembly of the write instruction.
827 * @param pRegFrame The trap register frame.
828 * @param GCPhysFault The fault address as guest physical address.
829 * @param pvFault The fault address.
830 * @todo VBOXSTRICTRC
831 */
832static int pgmPoolAccessHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
833 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
834{
835 /*
836 * First, do the flushing.
837 */
838 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
839
840 /*
841 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
842 * Must do this in raw mode (!); XP boot will fail otherwise.
843 */
844 uint32_t cbWritten;
845 VBOXSTRICTRC rc2 = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL, &cbWritten);
846 if (RT_SUCCESS(rc2))
847 {
848 pRegFrame->rip += pDis->opsize;
849 AssertMsg(rc2 == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
850 }
851 else if (rc2 == VERR_EM_INTERPRETER)
852 {
853#ifdef IN_RC
854 if (PATMIsPatchGCAddr(pVM, pRegFrame->eip))
855 {
856 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
857 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
858 rc = VINF_SUCCESS;
859 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
860 }
861 else
862#endif
863 {
864 rc = VINF_EM_RAW_EMULATE_INSTR;
865 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
866 }
867 }
868 else
869 rc = VBOXSTRICTRC_VAL(rc2);
870
871 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
872 return rc;
873}
874
875/**
876 * Handles the STOSD write accesses.
877 *
878 * @returns VBox status code suitable for scheduling.
879 * @param pVM The VM handle.
880 * @param pPool The pool.
881 * @param pPage The pool page (head).
882 * @param pDis The disassembly of the write instruction.
883 * @param pRegFrame The trap register frame.
884 * @param GCPhysFault The fault address as guest physical address.
885 * @param pvFault The fault address.
886 */
887DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
888 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
889{
890 unsigned uIncrement = pDis->param1.size;
891
892 Assert(pDis->mode == CPUMODE_32BIT || pDis->mode == CPUMODE_64BIT);
893 Assert(pRegFrame->rcx <= 0x20);
894
895#ifdef VBOX_STRICT
896 if (pDis->opmode == CPUMODE_32BIT)
897 Assert(uIncrement == 4);
898 else
899 Assert(uIncrement == 8);
900#endif
901
902 Log3(("pgmPoolAccessHandlerSTOSD\n"));
903
904 /*
905 * Increment the modification counter and insert it into the list
906 * of modified pages the first time.
907 */
908 if (!pPage->cModifications++)
909 pgmPoolMonitorModifiedInsert(pPool, pPage);
910
911 /*
912 * Execute REP STOSD.
913 *
914 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
915 * write situation, meaning that it's safe to write here.
916 */
917 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
918 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
919 while (pRegFrame->rcx)
920 {
921#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
922 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
923 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
924 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
925#else
926 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
927#endif
928#ifdef IN_RC
929 *(uint32_t *)(uintptr_t)pu32 = pRegFrame->eax;
930#else
931 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
932#endif
933 pu32 += uIncrement;
934 GCPhysFault += uIncrement;
935 pRegFrame->rdi += uIncrement;
936 pRegFrame->rcx--;
937 }
938 pRegFrame->rip += pDis->opsize;
939
940 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
941 return VINF_SUCCESS;
942}
943
944
945/**
946 * Handles the simple write accesses.
947 *
948 * @returns VBox status code suitable for scheduling.
949 * @param pVM The VM handle.
950 * @param pVCpu The VMCPU handle.
951 * @param pPool The pool.
952 * @param pPage The pool page (head).
953 * @param pDis The disassembly of the write instruction.
954 * @param pRegFrame The trap register frame.
955 * @param GCPhysFault The fault address as guest physical address.
956 * @param pvFault The fault address.
957 * @param pfReused Reused state (out)
958 */
959DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
960 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
961{
962 Log3(("pgmPoolAccessHandlerSimple\n"));
963 /*
964 * Increment the modification counter and insert it into the list
965 * of modified pages the first time.
966 */
967 if (!pPage->cModifications++)
968 pgmPoolMonitorModifiedInsert(pPool, pPage);
969
970 /*
971 * Clear all the pages. ASSUMES that pvFault is readable.
972 */
973#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
974 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
975 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
976 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
977#else
978 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
979#endif
980
981 /*
982 * Interpret the instruction.
983 */
984 uint32_t cb;
985 VBOXSTRICTRC rc = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL, &cb);
986 if (RT_SUCCESS(rc))
987 {
988 pRegFrame->rip += pDis->opsize;
989 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
990 }
991 else if (rc == VERR_EM_INTERPRETER)
992 {
993 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
994 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode));
995 rc = VINF_EM_RAW_EMULATE_INSTR;
996 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
997 }
998
999#if 0 /* experimental code */
1000 if (rc == VINF_SUCCESS)
1001 {
1002 switch (pPage->enmKind)
1003 {
1004 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1005 {
1006 X86PTEPAE GstPte;
1007 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1008 AssertRC(rc);
1009
1010 /* Check the new value written by the guest. If present and with a bogus physical address, then
1011 * it's fairly safe to assume the guest is reusing the PT.
1012 */
1013 if (GstPte.n.u1Present)
1014 {
1015 RTHCPHYS HCPhys = -1;
1016 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1017 if (rc != VINF_SUCCESS)
1018 {
1019 *pfReused = true;
1020 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1021 }
1022 }
1023 break;
1024 }
1025 }
1026 }
1027#endif
1028
1029 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", VBOXSTRICTRC_VAL(rc), cb));
1030 return VBOXSTRICTRC_VAL(rc);
1031}
1032
1033/**
1034 * \#PF Handler callback for PT write accesses.
1035 *
1036 * @returns VBox status code (appropriate for GC return).
1037 * @param pVM VM Handle.
1038 * @param uErrorCode CPU Error code.
1039 * @param pRegFrame Trap register frame.
1040 * NULL on DMA and other non CPU access.
1041 * @param pvFault The fault address (cr2).
1042 * @param GCPhysFault The GC physical address corresponding to pvFault.
1043 * @param pvUser User argument.
1044 */
1045DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1046{
1047 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1048 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1049 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1050 PVMCPU pVCpu = VMMGetCpu(pVM);
1051 unsigned cMaxModifications;
1052 bool fForcedFlush = false;
1053
1054 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1055
1056 pgmLock(pVM);
1057 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1058 {
1059 /* Pool page changed while we were waiting for the lock; ignore. */
1060 Log(("CPU%d: pgmPoolAccessHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1061 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1062 pgmUnlock(pVM);
1063 return VINF_SUCCESS;
1064 }
1065#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1066 if (pPage->fDirty)
1067 {
1068 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH));
1069 pgmUnlock(pVM);
1070 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1071 }
1072#endif
1073
1074#if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1075 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1076 {
1077 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1078 void *pvGst;
1079 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1080 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1081 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1082 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1083 }
1084#endif
1085
1086 /*
1087 * Disassemble the faulting instruction.
1088 */
1089 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1090 int rc = EMInterpretDisasOne(pVM, pVCpu, pRegFrame, pDis, NULL);
1091 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1092 {
1093 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1094 pgmUnlock(pVM);
1095 return rc;
1096 }
1097
1098 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1099
1100 /*
1101 * We should ALWAYS have the list head as user parameter. This
1102 * is because we use that page to record the changes.
1103 */
1104 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1105
1106#ifdef IN_RING0
1107 /* Maximum nr of modifications depends on the page type. */
1108 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1109 cMaxModifications = 4;
1110 else
1111 cMaxModifications = 24;
1112#else
1113 cMaxModifications = 48;
1114#endif
1115
1116 /*
1117 * Incremental page table updates should weigh more than random ones.
1118 * (Only applies when started from offset 0)
1119 */
1120 pVCpu->pgm.s.cPoolAccessHandler++;
1121 if ( pPage->pvLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1122 && pPage->pvLastAccessHandlerRip < pRegFrame->rip + 0x40
1123 && pvFault == (pPage->pvLastAccessHandlerFault + pDis->param1.size)
1124 && pVCpu->pgm.s.cPoolAccessHandler == (pPage->cLastAccessHandlerCount + 1))
1125 {
1126 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1127 Assert(pPage->cModifications < 32000);
1128 pPage->cModifications = pPage->cModifications * 2;
1129 pPage->pvLastAccessHandlerFault = pvFault;
1130 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1131 if (pPage->cModifications >= cMaxModifications)
1132 {
1133 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1134 fForcedFlush = true;
1135 }
1136 }
1137
1138 if (pPage->cModifications >= cMaxModifications)
1139 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1140
1141 /*
1142 * Check if it's worth dealing with.
1143 */
1144 bool fReused = false;
1145 bool fNotReusedNotForking = false;
1146 if ( ( pPage->cModifications < cMaxModifications /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1147 || pgmPoolIsPageLocked(&pVM->pgm.s, pPage)
1148 )
1149 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1150 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1151 {
1152 /*
1153 * Simple instructions, no REP prefix.
1154 */
1155 if (!(pDis->prefix & (PREFIX_REP | PREFIX_REPNE)))
1156 {
1157 rc = pgmPoolAccessHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1158 if (fReused)
1159 goto flushPage;
1160
1161 /* A mov instruction to change the first page table entry will be remembered so we can detect
1162 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1163 */
1164 if ( rc == VINF_SUCCESS
1165 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1166 && pDis->pCurInstr->opcode == OP_MOV
1167 && (pvFault & PAGE_OFFSET_MASK) == 0)
1168 {
1169 pPage->pvLastAccessHandlerFault = pvFault;
1170 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1171 pPage->pvLastAccessHandlerRip = pRegFrame->rip;
1172 /* Make sure we don't kick out a page too quickly. */
1173 if (pPage->cModifications > 8)
1174 pPage->cModifications = 2;
1175 }
1176 else
1177 if (pPage->pvLastAccessHandlerFault == pvFault)
1178 {
1179 /* ignore the 2nd write to this page table entry. */
1180 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1181 }
1182 else
1183 {
1184 pPage->pvLastAccessHandlerFault = 0;
1185 pPage->pvLastAccessHandlerRip = 0;
1186 }
1187
1188 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1189 pgmUnlock(pVM);
1190 return rc;
1191 }
1192
1193 /*
1194 * Windows is frequently doing small memset() operations (netio test 4k+).
1195 * We have to deal with these or we'll kill the cache and performance.
1196 */
1197 if ( pDis->pCurInstr->opcode == OP_STOSWD
1198 && !pRegFrame->eflags.Bits.u1DF
1199 && pDis->opmode == pDis->mode
1200 && pDis->addrmode == pDis->mode)
1201 {
1202 bool fValidStosd = false;
1203
1204 if ( pDis->mode == CPUMODE_32BIT
1205 && pDis->prefix == PREFIX_REP
1206 && pRegFrame->ecx <= 0x20
1207 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1208 && !((uintptr_t)pvFault & 3)
1209 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1210 )
1211 {
1212 fValidStosd = true;
1213 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1214 }
1215 else
1216 if ( pDis->mode == CPUMODE_64BIT
1217 && pDis->prefix == (PREFIX_REP | PREFIX_REX)
1218 && pRegFrame->rcx <= 0x20
1219 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1220 && !((uintptr_t)pvFault & 7)
1221 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1222 )
1223 {
1224 fValidStosd = true;
1225 }
1226
1227 if (fValidStosd)
1228 {
1229 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1230 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1231 pgmUnlock(pVM);
1232 return rc;
1233 }
1234 }
1235
1236 /* REP prefix, don't bother. */
1237 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1238 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1239 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode, pDis->prefix));
1240 fNotReusedNotForking = true;
1241 }
1242
1243#if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1244 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1245 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1246 */
1247 if ( pPage->cModifications >= cMaxModifications
1248 && !fForcedFlush
1249# if 1
1250 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1251# else /* test code */
1252 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1253# endif
1254 && ( fNotReusedNotForking
1255 || ( !pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
1256 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1257 )
1258 )
1259 {
1260 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
1261 Assert(pPage->fDirty == false);
1262
1263 /* Flush any monitored duplicates as we will disable write protection. */
1264 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1265 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1266 {
1267 PPGMPOOLPAGE pPageHead = pPage;
1268
1269 /* Find the monitor head. */
1270 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1271 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1272
1273 while (pPageHead)
1274 {
1275 unsigned idxNext = pPageHead->iMonitoredNext;
1276
1277 if (pPageHead != pPage)
1278 {
1279 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1280 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1281 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1282 AssertRC(rc2);
1283 }
1284
1285 if (idxNext == NIL_PGMPOOL_IDX)
1286 break;
1287
1288 pPageHead = &pPool->aPages[idxNext];
1289 }
1290 }
1291
1292 /* The flushing above might fail for locked pages, so double check. */
1293 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1294 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1295 {
1296 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1297
1298 /* Temporarily allow write access to the page table again. */
1299 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK, pPage->GCPhys & PAGE_BASE_GC_MASK);
1300 if (rc == VINF_SUCCESS)
1301 {
1302 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1303 AssertMsg(rc == VINF_SUCCESS
1304 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1305 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1306 || rc == VERR_PAGE_NOT_PRESENT,
1307 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1308
1309 pPage->pvDirtyFault = pvFault;
1310
1311 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1312 pgmUnlock(pVM);
1313 return rc;
1314 }
1315 }
1316 }
1317#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1318
1319 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushModOverflow));
1320flushPage:
1321 /*
1322 * Not worth it, so flush it.
1323 *
1324 * If we considered it to be reused, don't go back to ring-3
1325 * to emulate failed instructions since we usually cannot
1326 * interpret then. This may be a bit risky, in which case
1327 * the reuse detection must be fixed.
1328 */
1329 rc = pgmPoolAccessHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1330 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1331 && fReused)
1332 {
1333 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1334 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1335 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1336 }
1337 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1338 pgmUnlock(pVM);
1339 return rc;
1340}
1341
1342# endif /* !IN_RING3 */
1343
1344# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1345
1346# ifdef VBOX_STRICT
1347/**
1348 * Check references to guest physical memory in a PAE / PAE page table.
1349 *
1350 * @param pPool The pool.
1351 * @param pPage The page.
1352 * @param pShwPT The shadow page table (mapping of the page).
1353 * @param pGstPT The guest page table.
1354 */
1355static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1356{
1357 unsigned cErrors = 0;
1358 int LastRc = -1; /* initialized to shut up gcc */
1359 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1360 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1361 PVM pVM = pPool->CTX_SUFF(pVM);
1362
1363#ifdef VBOX_STRICT
1364 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1365 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1366#endif
1367 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1368 {
1369 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1370 {
1371 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1372 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1373 if ( rc != VINF_SUCCESS
1374 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1375 {
1376 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1377 LastPTE = i;
1378 LastRc = rc;
1379 LastHCPhys = HCPhys;
1380 cErrors++;
1381
1382 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1383 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1384 AssertRC(rc);
1385
1386 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1387 {
1388 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1389
1390 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1391 {
1392 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1393
1394 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1395 {
1396 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1397 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1398 {
1399 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1400 }
1401 }
1402
1403 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1404 }
1405 }
1406 }
1407 }
1408 }
1409 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1410}
1411
1412/**
1413 * Check references to guest physical memory in a PAE / 32-bit page table.
1414 *
1415 * @param pPool The pool.
1416 * @param pPage The page.
1417 * @param pShwPT The shadow page table (mapping of the page).
1418 * @param pGstPT The guest page table.
1419 */
1420static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1421{
1422 unsigned cErrors = 0;
1423 int LastRc = -1; /* initialized to shut up gcc */
1424 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1425 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1426 PVM pVM = pPool->CTX_SUFF(pVM);
1427
1428#ifdef VBOX_STRICT
1429 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1430 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1431#endif
1432 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1433 {
1434 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1435 {
1436 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1437 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1438 if ( rc != VINF_SUCCESS
1439 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1440 {
1441 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1442 LastPTE = i;
1443 LastRc = rc;
1444 LastHCPhys = HCPhys;
1445 cErrors++;
1446
1447 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1448 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1449 AssertRC(rc);
1450
1451 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1452 {
1453 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1454
1455 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1456 {
1457 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1458
1459 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1460 {
1461 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1462 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1463 {
1464 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1465 }
1466 }
1467
1468 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1469 }
1470 }
1471 }
1472 }
1473 }
1474 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1475}
1476
1477# endif /* VBOX_STRICT */
1478
1479/**
1480 * Clear references to guest physical memory in a PAE / PAE page table.
1481 *
1482 * @returns nr of changed PTEs
1483 * @param pPool The pool.
1484 * @param pPage The page.
1485 * @param pShwPT The shadow page table (mapping of the page).
1486 * @param pGstPT The guest page table.
1487 * @param pOldGstPT The old cached guest page table.
1488 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1489 * @param pfFlush Flush reused page table (out)
1490 */
1491DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1492 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1493{
1494 unsigned cChanged = 0;
1495
1496#ifdef VBOX_STRICT
1497 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1498 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1499#endif
1500 *pfFlush = false;
1501
1502 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1503 {
1504 /* Check the new value written by the guest. If present and with a bogus physical address, then
1505 * it's fairly safe to assume the guest is reusing the PT.
1506 */
1507 if ( fAllowRemoval
1508 && pGstPT->a[i].n.u1Present)
1509 {
1510 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1511 {
1512 *pfFlush = true;
1513 return ++cChanged;
1514 }
1515 }
1516 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1517 {
1518 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1519 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1520 {
1521#ifdef VBOX_STRICT
1522 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1523 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1524 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1525#endif
1526 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1527 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1528 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1529 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1530
1531 if ( uHostAttr == uGuestAttr
1532 && fHostRW <= fGuestRW)
1533 continue;
1534 }
1535 cChanged++;
1536 /* Something was changed, so flush it. */
1537 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1538 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1539 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1540 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1541 }
1542 }
1543 return cChanged;
1544}
1545
1546/**
1547 * Clear references to guest physical memory in a PAE / PAE page table.
1548 *
1549 * @returns nr of changed PTEs
1550 * @param pPool The pool.
1551 * @param pPage The page.
1552 * @param pShwPT The shadow page table (mapping of the page).
1553 * @param pGstPT The guest page table.
1554 * @param pOldGstPT The old cached guest page table.
1555 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1556 * @param pfFlush Flush reused page table (out)
1557 */
1558DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1559 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1560{
1561 unsigned cChanged = 0;
1562
1563#ifdef VBOX_STRICT
1564 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1565 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1566#endif
1567 *pfFlush = false;
1568
1569 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1570 {
1571 /* Check the new value written by the guest. If present and with a bogus physical address, then
1572 * it's fairly safe to assume the guest is reusing the PT.
1573 */
1574 if ( fAllowRemoval
1575 && pGstPT->a[i].n.u1Present)
1576 {
1577 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK))
1578 {
1579 *pfFlush = true;
1580 return ++cChanged;
1581 }
1582 }
1583 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1584 {
1585 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1586 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1587 {
1588#ifdef VBOX_STRICT
1589 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1590 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1591 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1592#endif
1593 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1594 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1595 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1596 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1597
1598 if ( uHostAttr == uGuestAttr
1599 && fHostRW <= fGuestRW)
1600 continue;
1601 }
1602 cChanged++;
1603 /* Something was changed, so flush it. */
1604 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1605 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1606 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1607 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1608 }
1609 }
1610 return cChanged;
1611}
1612
1613/**
1614 * Flush a dirty page
1615 *
1616 * @param pVM VM Handle.
1617 * @param pPool The pool.
1618 * @param idxSlot Dirty array slot index
1619 * @param fAllowRemoval Allow a reused page table to be removed
1620 */
1621static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1622{
1623 PPGMPOOLPAGE pPage;
1624 unsigned idxPage;
1625
1626 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1627 if (pPool->aDirtyPages[idxSlot].uIdx == NIL_PGMPOOL_IDX)
1628 return;
1629
1630 idxPage = pPool->aDirtyPages[idxSlot].uIdx;
1631 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1632 pPage = &pPool->aPages[idxPage];
1633 Assert(pPage->idx == idxPage);
1634 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1635
1636 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1637 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1638
1639#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1640 PVMCPU pVCpu = VMMGetCpu(pVM);
1641 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
1642#endif
1643
1644 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1645 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK);
1646 Assert(rc == VINF_SUCCESS);
1647 pPage->fDirty = false;
1648
1649#ifdef VBOX_STRICT
1650 uint64_t fFlags = 0;
1651 RTHCPHYS HCPhys;
1652 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->pvDirtyFault, &fFlags, &HCPhys);
1653 AssertMsg( ( rc == VINF_SUCCESS
1654 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1655 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1656 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1657 || rc == VERR_PAGE_NOT_PRESENT,
1658 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->pvDirtyFault, rc, fFlags));
1659#endif
1660
1661 /* Flush those PTEs that have changed. */
1662 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1663 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1664 void *pvGst;
1665 rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1666 bool fFlush;
1667 unsigned cChanges;
1668
1669 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1670 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1671 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1672 else
1673 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1674 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1675
1676 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1677 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1678 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1679 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1680
1681 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1682 Assert(pPage->cModifications);
1683 if (cChanges < 4)
1684 pPage->cModifications = 1; /* must use > 0 here */
1685 else
1686 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1687
1688 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1689 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1690 pPool->idxFreeDirtyPage = idxSlot;
1691
1692 pPool->cDirtyPages--;
1693 pPool->aDirtyPages[idxSlot].uIdx = NIL_PGMPOOL_IDX;
1694 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1695 if (fFlush)
1696 {
1697 Assert(fAllowRemoval);
1698 Log(("Flush reused page table!\n"));
1699 pgmPoolFlushPage(pPool, pPage);
1700 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1701 }
1702 else
1703 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1704
1705#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1706 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
1707#endif
1708}
1709
1710# ifndef IN_RING3
1711/**
1712 * Add a new dirty page
1713 *
1714 * @param pVM VM Handle.
1715 * @param pPool The pool.
1716 * @param pPage The page.
1717 */
1718void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1719{
1720 unsigned idxFree;
1721
1722 Assert(PGMIsLocked(pVM));
1723 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1724 Assert(!pPage->fDirty);
1725
1726 idxFree = pPool->idxFreeDirtyPage;
1727 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1728 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1729
1730 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1731 {
1732 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1733 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1734 }
1735 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1736 AssertMsg(pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1737
1738 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1739
1740 /*
1741 * Make a copy of the guest page table as we require valid GCPhys addresses
1742 * when removing references to physical pages.
1743 * (The HCPhys linear lookup is *extremely* expensive!)
1744 */
1745 void *pvGst;
1746 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1747 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst, PAGE_SIZE);
1748#ifdef VBOX_STRICT
1749 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1750 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1751 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1752 else
1753 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1754 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1755#endif
1756 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1757
1758 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1759 pPage->fDirty = true;
1760 pPage->idxDirty = idxFree;
1761 pPool->aDirtyPages[idxFree].uIdx = pPage->idx;
1762 pPool->cDirtyPages++;
1763
1764 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1765 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1766 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1767 {
1768 unsigned i;
1769 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1770 {
1771 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1772 if (pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX)
1773 {
1774 pPool->idxFreeDirtyPage = idxFree;
1775 break;
1776 }
1777 }
1778 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1779 }
1780
1781 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX);
1782 return;
1783}
1784# endif /* !IN_RING3 */
1785
1786/**
1787 * Check if the specified page is dirty (not write monitored)
1788 *
1789 * @return dirty or not
1790 * @param pVM VM Handle.
1791 * @param GCPhys Guest physical address
1792 */
1793bool pgmPoolIsDirtyPage(PVM pVM, RTGCPHYS GCPhys)
1794{
1795 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1796 Assert(PGMIsLocked(pVM));
1797 if (!pPool->cDirtyPages)
1798 return false;
1799
1800 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1801
1802 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1803 {
1804 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
1805 {
1806 PPGMPOOLPAGE pPage;
1807 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
1808
1809 pPage = &pPool->aPages[idxPage];
1810 if (pPage->GCPhys == GCPhys)
1811 return true;
1812 }
1813 }
1814 return false;
1815}
1816
1817/**
1818 * Reset all dirty pages by reinstating page monitoring.
1819 *
1820 * @param pVM VM Handle.
1821 */
1822void pgmPoolResetDirtyPages(PVM pVM)
1823{
1824 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1825 Assert(PGMIsLocked(pVM));
1826 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1827
1828 if (!pPool->cDirtyPages)
1829 return;
1830
1831 Log(("pgmPoolResetDirtyPages\n"));
1832 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1833 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1834
1835 pPool->idxFreeDirtyPage = 0;
1836 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1837 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1838 {
1839 unsigned i;
1840 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1841 {
1842 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
1843 {
1844 pPool->idxFreeDirtyPage = i;
1845 break;
1846 }
1847 }
1848 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1849 }
1850
1851 Assert(pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
1852 return;
1853}
1854
1855/**
1856 * Invalidate the PT entry for the specified page
1857 *
1858 * @param pVM VM Handle.
1859 * @param GCPtrPage Guest page to invalidate
1860 */
1861void pgmPoolResetDirtyPage(PVM pVM, RTGCPTR GCPtrPage)
1862{
1863 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1864 Assert(PGMIsLocked(pVM));
1865 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1866
1867 if (!pPool->cDirtyPages)
1868 return;
1869
1870 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage));
1871 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1872 {
1873 }
1874}
1875
1876/**
1877 * Reset all dirty pages by reinstating page monitoring.
1878 *
1879 * @param pVM VM Handle.
1880 * @param GCPhysPT Physical address of the page table
1881 */
1882void pgmPoolInvalidateDirtyPage(PVM pVM, RTGCPHYS GCPhysPT)
1883{
1884 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1885 Assert(PGMIsLocked(pVM));
1886 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1887 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
1888
1889 if (!pPool->cDirtyPages)
1890 return;
1891
1892 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1893
1894 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1895 {
1896 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
1897 {
1898 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
1899
1900 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1901 if (pPage->GCPhys == GCPhysPT)
1902 {
1903 idxDirtyPage = i;
1904 break;
1905 }
1906 }
1907 }
1908
1909 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
1910 {
1911 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
1912 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1913 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1914 {
1915 unsigned i;
1916 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1917 {
1918 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
1919 {
1920 pPool->idxFreeDirtyPage = i;
1921 break;
1922 }
1923 }
1924 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1925 }
1926 }
1927}
1928
1929# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1930
1931/**
1932 * Inserts a page into the GCPhys hash table.
1933 *
1934 * @param pPool The pool.
1935 * @param pPage The page.
1936 */
1937DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1938{
1939 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1940 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1941 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1942 pPage->iNext = pPool->aiHash[iHash];
1943 pPool->aiHash[iHash] = pPage->idx;
1944}
1945
1946
1947/**
1948 * Removes a page from the GCPhys hash table.
1949 *
1950 * @param pPool The pool.
1951 * @param pPage The page.
1952 */
1953DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1954{
1955 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1956 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1957 if (pPool->aiHash[iHash] == pPage->idx)
1958 pPool->aiHash[iHash] = pPage->iNext;
1959 else
1960 {
1961 uint16_t iPrev = pPool->aiHash[iHash];
1962 for (;;)
1963 {
1964 const int16_t i = pPool->aPages[iPrev].iNext;
1965 if (i == pPage->idx)
1966 {
1967 pPool->aPages[iPrev].iNext = pPage->iNext;
1968 break;
1969 }
1970 if (i == NIL_PGMPOOL_IDX)
1971 {
1972 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
1973 break;
1974 }
1975 iPrev = i;
1976 }
1977 }
1978 pPage->iNext = NIL_PGMPOOL_IDX;
1979}
1980
1981
1982/**
1983 * Frees up one cache page.
1984 *
1985 * @returns VBox status code.
1986 * @retval VINF_SUCCESS on success.
1987 * @param pPool The pool.
1988 * @param iUser The user index.
1989 */
1990static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1991{
1992#ifndef IN_RC
1993 const PVM pVM = pPool->CTX_SUFF(pVM);
1994#endif
1995 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1996 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1997
1998 /*
1999 * Select one page from the tail of the age list.
2000 */
2001 PPGMPOOLPAGE pPage;
2002 for (unsigned iLoop = 0; ; iLoop++)
2003 {
2004 uint16_t iToFree = pPool->iAgeTail;
2005 if (iToFree == iUser)
2006 iToFree = pPool->aPages[iToFree].iAgePrev;
2007/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2008 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2009 {
2010 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2011 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2012 {
2013 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2014 continue;
2015 iToFree = i;
2016 break;
2017 }
2018 }
2019*/
2020 Assert(iToFree != iUser);
2021 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
2022 pPage = &pPool->aPages[iToFree];
2023
2024 /*
2025 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2026 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2027 */
2028 if (!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage))
2029 break;
2030 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2031 pgmPoolCacheUsed(pPool, pPage);
2032 AssertLogRelReturn(iLoop < 8192, VERR_INTERNAL_ERROR);
2033 }
2034
2035 /*
2036 * Found a usable page, flush it and return.
2037 */
2038 int rc = pgmPoolFlushPage(pPool, pPage);
2039 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2040 /* todo: find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2041 if (rc == VINF_SUCCESS)
2042 PGM_INVL_ALL_VCPU_TLBS(pVM);
2043 return rc;
2044}
2045
2046
2047/**
2048 * Checks if a kind mismatch is really a page being reused
2049 * or if it's just normal remappings.
2050 *
2051 * @returns true if reused and the cached page (enmKind1) should be flushed
2052 * @returns false if not reused.
2053 * @param enmKind1 The kind of the cached page.
2054 * @param enmKind2 The kind of the requested page.
2055 */
2056static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2057{
2058 switch (enmKind1)
2059 {
2060 /*
2061 * Never reuse them. There is no remapping in non-paging mode.
2062 */
2063 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2064 case PGMPOOLKIND_32BIT_PD_PHYS:
2065 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2066 case PGMPOOLKIND_PAE_PD_PHYS:
2067 case PGMPOOLKIND_PAE_PDPT_PHYS:
2068 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2069 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2070 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2071 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2072 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2073 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2074 return false;
2075
2076 /*
2077 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2078 */
2079 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2080 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2081 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2082 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2083 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2084 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2085 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2086 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2087 case PGMPOOLKIND_32BIT_PD:
2088 case PGMPOOLKIND_PAE_PDPT:
2089 switch (enmKind2)
2090 {
2091 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2092 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2093 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2094 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2095 case PGMPOOLKIND_64BIT_PML4:
2096 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2097 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2098 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2099 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2100 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2101 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2102 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2103 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2104 return true;
2105 default:
2106 return false;
2107 }
2108
2109 /*
2110 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2111 */
2112 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2113 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2114 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2115 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2116 case PGMPOOLKIND_64BIT_PML4:
2117 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2118 switch (enmKind2)
2119 {
2120 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2121 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2122 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2123 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2124 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2125 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2126 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2127 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2128 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2129 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2130 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2131 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2132 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2133 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2134 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2135 return true;
2136 default:
2137 return false;
2138 }
2139
2140 /*
2141 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2142 */
2143 case PGMPOOLKIND_ROOT_NESTED:
2144 return false;
2145
2146 default:
2147 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2148 }
2149}
2150
2151
2152/**
2153 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2154 *
2155 * @returns VBox status code.
2156 * @retval VINF_PGM_CACHED_PAGE on success.
2157 * @retval VERR_FILE_NOT_FOUND if not found.
2158 * @param pPool The pool.
2159 * @param GCPhys The GC physical address of the page we're gonna shadow.
2160 * @param enmKind The kind of mapping.
2161 * @param enmAccess Access type for the mapping (only relevant for big pages)
2162 * @param iUser The shadow page pool index of the user table.
2163 * @param iUserTable The index into the user table (shadowed).
2164 * @param ppPage Where to store the pointer to the page.
2165 */
2166static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2167{
2168#ifndef IN_RC
2169 const PVM pVM = pPool->CTX_SUFF(pVM);
2170#endif
2171 /*
2172 * Look up the GCPhys in the hash.
2173 */
2174 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2175 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2176 if (i != NIL_PGMPOOL_IDX)
2177 {
2178 do
2179 {
2180 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2181 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2182 if (pPage->GCPhys == GCPhys)
2183 {
2184 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2185 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess)
2186 {
2187 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2188 * doesn't flush it in case there are no more free use records.
2189 */
2190 pgmPoolCacheUsed(pPool, pPage);
2191
2192 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2193 if (RT_SUCCESS(rc))
2194 {
2195 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2196 *ppPage = pPage;
2197 if (pPage->cModifications)
2198 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2199 STAM_COUNTER_INC(&pPool->StatCacheHits);
2200 return VINF_PGM_CACHED_PAGE;
2201 }
2202 return rc;
2203 }
2204
2205 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2206 {
2207 /*
2208 * The kind is different. In some cases we should now flush the page
2209 * as it has been reused, but in most cases this is normal remapping
2210 * of PDs as PT or big pages using the GCPhys field in a slightly
2211 * different way than the other kinds.
2212 */
2213 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2214 {
2215 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2216 pgmPoolFlushPage(pPool, pPage);
2217 break;
2218 }
2219 }
2220 }
2221
2222 /* next */
2223 i = pPage->iNext;
2224 } while (i != NIL_PGMPOOL_IDX);
2225 }
2226
2227 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2228 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2229 return VERR_FILE_NOT_FOUND;
2230}
2231
2232
2233/**
2234 * Inserts a page into the cache.
2235 *
2236 * @param pPool The pool.
2237 * @param pPage The cached page.
2238 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2239 */
2240static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2241{
2242 /*
2243 * Insert into the GCPhys hash if the page is fit for that.
2244 */
2245 Assert(!pPage->fCached);
2246 if (fCanBeCached)
2247 {
2248 pPage->fCached = true;
2249 pgmPoolHashInsert(pPool, pPage);
2250 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2251 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2252 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2253 }
2254 else
2255 {
2256 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2257 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2258 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2259 }
2260
2261 /*
2262 * Insert at the head of the age list.
2263 */
2264 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2265 pPage->iAgeNext = pPool->iAgeHead;
2266 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2267 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2268 else
2269 pPool->iAgeTail = pPage->idx;
2270 pPool->iAgeHead = pPage->idx;
2271}
2272
2273
2274/**
2275 * Flushes a cached page.
2276 *
2277 * @param pPool The pool.
2278 * @param pPage The cached page.
2279 */
2280static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2281{
2282 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2283
2284 /*
2285 * Remove the page from the hash.
2286 */
2287 if (pPage->fCached)
2288 {
2289 pPage->fCached = false;
2290 pgmPoolHashRemove(pPool, pPage);
2291 }
2292 else
2293 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2294
2295 /*
2296 * Remove it from the age list.
2297 */
2298 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2299 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2300 else
2301 pPool->iAgeTail = pPage->iAgePrev;
2302 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2303 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2304 else
2305 pPool->iAgeHead = pPage->iAgeNext;
2306 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2307 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2308}
2309
2310
2311/**
2312 * Looks for pages sharing the monitor.
2313 *
2314 * @returns Pointer to the head page.
2315 * @returns NULL if not found.
2316 * @param pPool The Pool
2317 * @param pNewPage The page which is going to be monitored.
2318 */
2319static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2320{
2321 /*
2322 * Look up the GCPhys in the hash.
2323 */
2324 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2325 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2326 if (i == NIL_PGMPOOL_IDX)
2327 return NULL;
2328 do
2329 {
2330 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2331 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2332 && pPage != pNewPage)
2333 {
2334 switch (pPage->enmKind)
2335 {
2336 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2337 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2338 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2339 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2340 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2341 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2342 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2343 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2344 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2345 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2346 case PGMPOOLKIND_64BIT_PML4:
2347 case PGMPOOLKIND_32BIT_PD:
2348 case PGMPOOLKIND_PAE_PDPT:
2349 {
2350 /* find the head */
2351 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2352 {
2353 Assert(pPage->iMonitoredPrev != pPage->idx);
2354 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2355 }
2356 return pPage;
2357 }
2358
2359 /* ignore, no monitoring. */
2360 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2361 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2362 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2363 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2364 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2365 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2366 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2367 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2368 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2369 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2370 case PGMPOOLKIND_ROOT_NESTED:
2371 case PGMPOOLKIND_PAE_PD_PHYS:
2372 case PGMPOOLKIND_PAE_PDPT_PHYS:
2373 case PGMPOOLKIND_32BIT_PD_PHYS:
2374 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2375 break;
2376 default:
2377 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2378 }
2379 }
2380
2381 /* next */
2382 i = pPage->iNext;
2383 } while (i != NIL_PGMPOOL_IDX);
2384 return NULL;
2385}
2386
2387
2388/**
2389 * Enabled write monitoring of a guest page.
2390 *
2391 * @returns VBox status code.
2392 * @retval VINF_SUCCESS on success.
2393 * @param pPool The pool.
2394 * @param pPage The cached page.
2395 */
2396static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2397{
2398 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2399
2400 /*
2401 * Filter out the relevant kinds.
2402 */
2403 switch (pPage->enmKind)
2404 {
2405 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2406 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2407 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2408 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2409 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2410 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2411 case PGMPOOLKIND_64BIT_PML4:
2412 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2413 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2414 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2415 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2416 case PGMPOOLKIND_32BIT_PD:
2417 case PGMPOOLKIND_PAE_PDPT:
2418 break;
2419
2420 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2421 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2422 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2423 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2424 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2425 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2426 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2427 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2428 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2429 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2430 case PGMPOOLKIND_ROOT_NESTED:
2431 /* Nothing to monitor here. */
2432 return VINF_SUCCESS;
2433
2434 case PGMPOOLKIND_32BIT_PD_PHYS:
2435 case PGMPOOLKIND_PAE_PDPT_PHYS:
2436 case PGMPOOLKIND_PAE_PD_PHYS:
2437 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2438 /* Nothing to monitor here. */
2439 return VINF_SUCCESS;
2440 default:
2441 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2442 }
2443
2444 /*
2445 * Install handler.
2446 */
2447 int rc;
2448 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2449 if (pPageHead)
2450 {
2451 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2452 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2453
2454#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2455 if (pPageHead->fDirty)
2456 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirty, false /* do not remove */);
2457#endif
2458
2459 pPage->iMonitoredPrev = pPageHead->idx;
2460 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2461 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2462 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2463 pPageHead->iMonitoredNext = pPage->idx;
2464 rc = VINF_SUCCESS;
2465 }
2466 else
2467 {
2468 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2469 PVM pVM = pPool->CTX_SUFF(pVM);
2470 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2471 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
2472 GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK,
2473 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
2474 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
2475 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
2476 pPool->pszAccessHandler);
2477 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2478 * the heap size should suffice. */
2479 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2480 PVMCPU pVCpu = VMMGetCpu(pVM);
2481 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2482 }
2483 pPage->fMonitored = true;
2484 return rc;
2485}
2486
2487
2488/**
2489 * Disables write monitoring of a guest page.
2490 *
2491 * @returns VBox status code.
2492 * @retval VINF_SUCCESS on success.
2493 * @param pPool The pool.
2494 * @param pPage The cached page.
2495 */
2496static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2497{
2498 /*
2499 * Filter out the relevant kinds.
2500 */
2501 switch (pPage->enmKind)
2502 {
2503 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2504 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2505 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2506 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2507 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2508 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2509 case PGMPOOLKIND_64BIT_PML4:
2510 case PGMPOOLKIND_32BIT_PD:
2511 case PGMPOOLKIND_PAE_PDPT:
2512 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2513 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2514 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2515 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2516 break;
2517
2518 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2519 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2520 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2521 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2522 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2523 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2524 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2525 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2526 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2527 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2528 case PGMPOOLKIND_ROOT_NESTED:
2529 case PGMPOOLKIND_PAE_PD_PHYS:
2530 case PGMPOOLKIND_PAE_PDPT_PHYS:
2531 case PGMPOOLKIND_32BIT_PD_PHYS:
2532 /* Nothing to monitor here. */
2533 Assert(!pPage->fMonitored);
2534 return VINF_SUCCESS;
2535
2536 default:
2537 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2538 }
2539 Assert(pPage->fMonitored);
2540
2541 /*
2542 * Remove the page from the monitored list or uninstall it if last.
2543 */
2544 const PVM pVM = pPool->CTX_SUFF(pVM);
2545 int rc;
2546 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2547 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2548 {
2549 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2550 {
2551 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2552 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2553 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK,
2554 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
2555 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
2556 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
2557 pPool->pszAccessHandler);
2558 AssertFatalRCSuccess(rc);
2559 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2560 }
2561 else
2562 {
2563 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2564 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2565 {
2566 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2567 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2568 }
2569 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2570 rc = VINF_SUCCESS;
2571 }
2572 }
2573 else
2574 {
2575 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2576 AssertFatalRC(rc);
2577 PVMCPU pVCpu = VMMGetCpu(pVM);
2578 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2579 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2580 }
2581 pPage->fMonitored = false;
2582
2583 /*
2584 * Remove it from the list of modified pages (if in it).
2585 */
2586 pgmPoolMonitorModifiedRemove(pPool, pPage);
2587
2588 return rc;
2589}
2590
2591
2592/**
2593 * Inserts the page into the list of modified pages.
2594 *
2595 * @param pPool The pool.
2596 * @param pPage The page.
2597 */
2598void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2599{
2600 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2601 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2602 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2603 && pPool->iModifiedHead != pPage->idx,
2604 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2605 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2606 pPool->iModifiedHead, pPool->cModifiedPages));
2607
2608 pPage->iModifiedNext = pPool->iModifiedHead;
2609 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2610 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2611 pPool->iModifiedHead = pPage->idx;
2612 pPool->cModifiedPages++;
2613#ifdef VBOX_WITH_STATISTICS
2614 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2615 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2616#endif
2617}
2618
2619
2620/**
2621 * Removes the page from the list of modified pages and resets the
2622 * moficiation counter.
2623 *
2624 * @param pPool The pool.
2625 * @param pPage The page which is believed to be in the list of modified pages.
2626 */
2627static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2628{
2629 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2630 if (pPool->iModifiedHead == pPage->idx)
2631 {
2632 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2633 pPool->iModifiedHead = pPage->iModifiedNext;
2634 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2635 {
2636 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2637 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2638 }
2639 pPool->cModifiedPages--;
2640 }
2641 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2642 {
2643 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2644 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2645 {
2646 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2647 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2648 }
2649 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2650 pPool->cModifiedPages--;
2651 }
2652 else
2653 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2654 pPage->cModifications = 0;
2655}
2656
2657
2658/**
2659 * Zaps the list of modified pages, resetting their modification counters in the process.
2660 *
2661 * @param pVM The VM handle.
2662 */
2663static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2664{
2665 pgmLock(pVM);
2666 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2667 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2668
2669 unsigned cPages = 0; NOREF(cPages);
2670
2671#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2672 pgmPoolResetDirtyPages(pVM);
2673#endif
2674
2675 uint16_t idx = pPool->iModifiedHead;
2676 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2677 while (idx != NIL_PGMPOOL_IDX)
2678 {
2679 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2680 idx = pPage->iModifiedNext;
2681 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2682 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2683 pPage->cModifications = 0;
2684 Assert(++cPages);
2685 }
2686 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2687 pPool->cModifiedPages = 0;
2688 pgmUnlock(pVM);
2689}
2690
2691
2692/**
2693 * Handle SyncCR3 pool tasks
2694 *
2695 * @returns VBox status code.
2696 * @retval VINF_SUCCESS if successfully added.
2697 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2698 * @param pVCpu The VMCPU handle.
2699 * @remark Should only be used when monitoring is available, thus placed in
2700 * the PGMPOOL_WITH_MONITORING #ifdef.
2701 */
2702int pgmPoolSyncCR3(PVMCPU pVCpu)
2703{
2704 PVM pVM = pVCpu->CTX_SUFF(pVM);
2705 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2706
2707 /*
2708 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2709 * Occasionally we will have to clear all the shadow page tables because we wanted
2710 * to monitor a page which was mapped by too many shadowed page tables. This operation
2711 * sometimes refered to as a 'lightweight flush'.
2712 */
2713# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2714 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2715 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2716# else /* !IN_RING3 */
2717 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2718 {
2719 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2720 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2721
2722 /* Make sure all other VCPUs return to ring 3. */
2723 if (pVM->cCpus > 1)
2724 {
2725 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2726 PGM_INVL_ALL_VCPU_TLBS(pVM);
2727 }
2728 return VINF_PGM_SYNC_CR3;
2729 }
2730# endif /* !IN_RING3 */
2731 else
2732 {
2733 pgmPoolMonitorModifiedClearAll(pVM);
2734
2735 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2736 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2737 {
2738 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2739 return pgmPoolSyncCR3(pVCpu);
2740 }
2741 }
2742 return VINF_SUCCESS;
2743}
2744
2745
2746/**
2747 * Frees up at least one user entry.
2748 *
2749 * @returns VBox status code.
2750 * @retval VINF_SUCCESS if successfully added.
2751 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2752 * @param pPool The pool.
2753 * @param iUser The user index.
2754 */
2755static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2756{
2757 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2758 /*
2759 * Just free cached pages in a braindead fashion.
2760 */
2761 /** @todo walk the age list backwards and free the first with usage. */
2762 int rc = VINF_SUCCESS;
2763 do
2764 {
2765 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2766 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2767 rc = rc2;
2768 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2769 return rc;
2770}
2771
2772
2773/**
2774 * Inserts a page into the cache.
2775 *
2776 * This will create user node for the page, insert it into the GCPhys
2777 * hash, and insert it into the age list.
2778 *
2779 * @returns VBox status code.
2780 * @retval VINF_SUCCESS if successfully added.
2781 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2782 * @param pPool The pool.
2783 * @param pPage The cached page.
2784 * @param GCPhys The GC physical address of the page we're gonna shadow.
2785 * @param iUser The user index.
2786 * @param iUserTable The user table index.
2787 */
2788DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2789{
2790 int rc = VINF_SUCCESS;
2791 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2792
2793 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable));
2794
2795#ifdef VBOX_STRICT
2796 /*
2797 * Check that the entry doesn't already exists.
2798 */
2799 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2800 {
2801 uint16_t i = pPage->iUserHead;
2802 do
2803 {
2804 Assert(i < pPool->cMaxUsers);
2805 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2806 i = paUsers[i].iNext;
2807 } while (i != NIL_PGMPOOL_USER_INDEX);
2808 }
2809#endif
2810
2811 /*
2812 * Find free a user node.
2813 */
2814 uint16_t i = pPool->iUserFreeHead;
2815 if (i == NIL_PGMPOOL_USER_INDEX)
2816 {
2817 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2818 if (RT_FAILURE(rc))
2819 return rc;
2820 i = pPool->iUserFreeHead;
2821 }
2822
2823 /*
2824 * Unlink the user node from the free list,
2825 * initialize and insert it into the user list.
2826 */
2827 pPool->iUserFreeHead = paUsers[i].iNext;
2828 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2829 paUsers[i].iUser = iUser;
2830 paUsers[i].iUserTable = iUserTable;
2831 pPage->iUserHead = i;
2832
2833 /*
2834 * Insert into cache and enable monitoring of the guest page if enabled.
2835 *
2836 * Until we implement caching of all levels, including the CR3 one, we'll
2837 * have to make sure we don't try monitor & cache any recursive reuse of
2838 * a monitored CR3 page. Because all windows versions are doing this we'll
2839 * have to be able to do combined access monitoring, CR3 + PT and
2840 * PD + PT (guest PAE).
2841 *
2842 * Update:
2843 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2844 */
2845 const bool fCanBeMonitored = true;
2846 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2847 if (fCanBeMonitored)
2848 {
2849 rc = pgmPoolMonitorInsert(pPool, pPage);
2850 AssertRC(rc);
2851 }
2852 return rc;
2853}
2854
2855
2856/**
2857 * Adds a user reference to a page.
2858 *
2859 * This will move the page to the head of the
2860 *
2861 * @returns VBox status code.
2862 * @retval VINF_SUCCESS if successfully added.
2863 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2864 * @param pPool The pool.
2865 * @param pPage The cached page.
2866 * @param iUser The user index.
2867 * @param iUserTable The user table.
2868 */
2869static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2870{
2871 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2872
2873 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2874
2875# ifdef VBOX_STRICT
2876 /*
2877 * Check that the entry doesn't already exists. We only allow multiple
2878 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
2879 */
2880 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2881 {
2882 uint16_t i = pPage->iUserHead;
2883 do
2884 {
2885 Assert(i < pPool->cMaxUsers);
2886 AssertMsg(iUser != PGMPOOL_IDX_PD || iUser != PGMPOOL_IDX_PDPT || iUser != PGMPOOL_IDX_NESTED_ROOT || iUser != PGMPOOL_IDX_AMD64_CR3 ||
2887 paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2888 i = paUsers[i].iNext;
2889 } while (i != NIL_PGMPOOL_USER_INDEX);
2890 }
2891# endif
2892
2893 /*
2894 * Allocate a user node.
2895 */
2896 uint16_t i = pPool->iUserFreeHead;
2897 if (i == NIL_PGMPOOL_USER_INDEX)
2898 {
2899 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2900 if (RT_FAILURE(rc))
2901 return rc;
2902 i = pPool->iUserFreeHead;
2903 }
2904 pPool->iUserFreeHead = paUsers[i].iNext;
2905
2906 /*
2907 * Initialize the user node and insert it.
2908 */
2909 paUsers[i].iNext = pPage->iUserHead;
2910 paUsers[i].iUser = iUser;
2911 paUsers[i].iUserTable = iUserTable;
2912 pPage->iUserHead = i;
2913
2914# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2915 if (pPage->fDirty)
2916 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirty, false /* do not remove */);
2917# endif
2918
2919 /*
2920 * Tell the cache to update its replacement stats for this page.
2921 */
2922 pgmPoolCacheUsed(pPool, pPage);
2923 return VINF_SUCCESS;
2924}
2925
2926
2927/**
2928 * Frees a user record associated with a page.
2929 *
2930 * This does not clear the entry in the user table, it simply replaces the
2931 * user record to the chain of free records.
2932 *
2933 * @param pPool The pool.
2934 * @param HCPhys The HC physical address of the shadow page.
2935 * @param iUser The shadow page pool index of the user table.
2936 * @param iUserTable The index into the user table (shadowed).
2937 */
2938static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2939{
2940 /*
2941 * Unlink and free the specified user entry.
2942 */
2943 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2944
2945 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2946 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2947 uint16_t i = pPage->iUserHead;
2948 if ( i != NIL_PGMPOOL_USER_INDEX
2949 && paUsers[i].iUser == iUser
2950 && paUsers[i].iUserTable == iUserTable)
2951 {
2952 pPage->iUserHead = paUsers[i].iNext;
2953
2954 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2955 paUsers[i].iNext = pPool->iUserFreeHead;
2956 pPool->iUserFreeHead = i;
2957 return;
2958 }
2959
2960 /* General: Linear search. */
2961 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2962 while (i != NIL_PGMPOOL_USER_INDEX)
2963 {
2964 if ( paUsers[i].iUser == iUser
2965 && paUsers[i].iUserTable == iUserTable)
2966 {
2967 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2968 paUsers[iPrev].iNext = paUsers[i].iNext;
2969 else
2970 pPage->iUserHead = paUsers[i].iNext;
2971
2972 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2973 paUsers[i].iNext = pPool->iUserFreeHead;
2974 pPool->iUserFreeHead = i;
2975 return;
2976 }
2977 iPrev = i;
2978 i = paUsers[i].iNext;
2979 }
2980
2981 /* Fatal: didn't find it */
2982 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
2983 iUser, iUserTable, pPage->GCPhys));
2984}
2985
2986
2987/**
2988 * Gets the entry size of a shadow table.
2989 *
2990 * @param enmKind The kind of page.
2991 *
2992 * @returns The size of the entry in bytes. That is, 4 or 8.
2993 * @returns If the kind is not for a table, an assertion is raised and 0 is
2994 * returned.
2995 */
2996DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2997{
2998 switch (enmKind)
2999 {
3000 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3001 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3002 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3003 case PGMPOOLKIND_32BIT_PD:
3004 case PGMPOOLKIND_32BIT_PD_PHYS:
3005 return 4;
3006
3007 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3008 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3009 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3010 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3011 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3012 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3013 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3014 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3015 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3016 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3017 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3018 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3019 case PGMPOOLKIND_64BIT_PML4:
3020 case PGMPOOLKIND_PAE_PDPT:
3021 case PGMPOOLKIND_ROOT_NESTED:
3022 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3023 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3024 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3025 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3026 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3027 case PGMPOOLKIND_PAE_PD_PHYS:
3028 case PGMPOOLKIND_PAE_PDPT_PHYS:
3029 return 8;
3030
3031 default:
3032 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3033 }
3034}
3035
3036
3037/**
3038 * Gets the entry size of a guest table.
3039 *
3040 * @param enmKind The kind of page.
3041 *
3042 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3043 * @returns If the kind is not for a table, an assertion is raised and 0 is
3044 * returned.
3045 */
3046DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3047{
3048 switch (enmKind)
3049 {
3050 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3051 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3052 case PGMPOOLKIND_32BIT_PD:
3053 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3054 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3055 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3056 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3057 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3058 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3059 return 4;
3060
3061 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3062 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3063 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3064 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3065 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3066 case PGMPOOLKIND_64BIT_PML4:
3067 case PGMPOOLKIND_PAE_PDPT:
3068 return 8;
3069
3070 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3071 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3072 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3073 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3074 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3075 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3076 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3077 case PGMPOOLKIND_ROOT_NESTED:
3078 case PGMPOOLKIND_PAE_PD_PHYS:
3079 case PGMPOOLKIND_PAE_PDPT_PHYS:
3080 case PGMPOOLKIND_32BIT_PD_PHYS:
3081 /** @todo can we return 0? (nobody is calling this...) */
3082 AssertFailed();
3083 return 0;
3084
3085 default:
3086 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3087 }
3088}
3089
3090
3091/**
3092 * Checks one shadow page table entry for a mapping of a physical page.
3093 *
3094 * @returns true / false indicating removal of all relevant PTEs
3095 *
3096 * @param pVM The VM handle.
3097 * @param pPhysPage The guest page in question.
3098 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3099 * @param iShw The shadow page table.
3100 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3101 */
3102static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3103{
3104 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3105 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3106 bool fRet = false;
3107
3108 /*
3109 * Assert sanity.
3110 */
3111 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3112 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3113 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3114
3115 /*
3116 * Then, clear the actual mappings to the page in the shadow PT.
3117 */
3118 switch (pPage->enmKind)
3119 {
3120 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3121 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3122 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3123 {
3124 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3125 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3126 uint32_t u32AndMask = 0;
3127 uint32_t u32OrMask = 0;
3128
3129 if (!fFlushPTEs)
3130 {
3131 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3132 {
3133 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
3134 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
3135 u32OrMask = X86_PTE_RW;
3136 u32AndMask = UINT32_MAX;
3137 fRet = true;
3138 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3139 break;
3140
3141 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3142 u32OrMask = 0;
3143 u32AndMask = ~X86_PTE_RW;
3144 fRet = true;
3145 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3146 break;
3147 default:
3148 /* (shouldn't be here, will assert below) */
3149 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3150 break;
3151 }
3152 }
3153 else
3154 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3155
3156 /* Update the counter if we're removing references. */
3157 if (!u32AndMask)
3158 {
3159 Assert(pPage->cPresent );
3160 Assert(pPool->cPresent);
3161 pPage->cPresent--;
3162 pPool->cPresent--;
3163 }
3164
3165 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3166 {
3167 X86PTE Pte;
3168
3169 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3170 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3171 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3172 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3173
3174 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3175 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3176 return fRet;
3177 }
3178#ifdef LOG_ENABLED
3179 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3180 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3181 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3182 {
3183 Log(("i=%d cFound=%d\n", i, ++cFound));
3184 }
3185#endif
3186 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3187 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3188 break;
3189 }
3190
3191 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3192 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3193 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3194 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3195 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3196 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3197 {
3198 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3199 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3200 uint64_t u64OrMask = 0;
3201 uint64_t u64AndMask = 0;
3202
3203 if (!fFlushPTEs)
3204 {
3205 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3206 {
3207 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3208 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3209 u64OrMask = X86_PTE_RW;
3210 u64AndMask = UINT64_MAX;
3211 fRet = true;
3212 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3213 break;
3214
3215 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3216 u64OrMask = 0;
3217 u64AndMask = ~(uint64_t)X86_PTE_RW;
3218 fRet = true;
3219 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3220 break;
3221
3222 default:
3223 /* (shouldn't be here, will assert below) */
3224 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3225 break;
3226 }
3227 }
3228 else
3229 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3230
3231 /* Update the counter if we're removing references. */
3232 if (!u64AndMask)
3233 {
3234 Assert(pPage->cPresent);
3235 Assert(pPool->cPresent);
3236 pPage->cPresent--;
3237 pPool->cPresent--;
3238 }
3239
3240 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3241 {
3242 X86PTEPAE Pte;
3243
3244 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3245 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3246 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3247 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3248
3249 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3250 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3251 return fRet;
3252 }
3253#ifdef LOG_ENABLED
3254 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3255 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3256 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3257 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3258 Log(("i=%d cFound=%d\n", i, ++cFound));
3259#endif
3260 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3261 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3262 break;
3263 }
3264
3265#ifdef PGM_WITH_LARGE_PAGES
3266 /* Large page case only. */
3267 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3268 {
3269 Assert(pVM->pgm.s.fNestedPaging);
3270
3271 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3272 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3273
3274 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3275 {
3276 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3277 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3278 pPD->a[iPte].u = 0;
3279 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3280
3281 /* Update the counter as we're removing references. */
3282 Assert(pPage->cPresent);
3283 Assert(pPool->cPresent);
3284 pPage->cPresent--;
3285 pPool->cPresent--;
3286
3287 return fRet;
3288 }
3289# ifdef LOG_ENABLED
3290 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3291 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3292 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3293 Log(("i=%d cFound=%d\n", i, ++cFound));
3294# endif
3295 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3296 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3297 break;
3298 }
3299
3300 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3301 case PGMPOOLKIND_PAE_PD_PHYS:
3302 {
3303 Assert(pVM->pgm.s.fNestedPaging);
3304
3305 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3306 PX86PD pPD = (PX86PD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3307
3308 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3309 {
3310 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3311 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3312 pPD->a[iPte].u = 0;
3313 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3314
3315 /* Update the counter as we're removing references. */
3316 Assert(pPage->cPresent);
3317 Assert(pPool->cPresent);
3318 pPage->cPresent--;
3319 pPool->cPresent--;
3320 return fRet;
3321 }
3322# ifdef LOG_ENABLED
3323 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3324 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3325 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3326 Log(("i=%d cFound=%d\n", i, ++cFound));
3327# endif
3328 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3329 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3330 break;
3331 }
3332#endif /* PGM_WITH_LARGE_PAGES */
3333
3334 default:
3335 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3336 }
3337 return fRet;
3338}
3339
3340
3341/**
3342 * Scans one shadow page table for mappings of a physical page.
3343 *
3344 * @param pVM The VM handle.
3345 * @param pPhysPage The guest page in question.
3346 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3347 * @param iShw The shadow page table.
3348 */
3349static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3350{
3351 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3352
3353 /* We should only come here with when there's only one reference to this physical page. */
3354 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3355
3356 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3357 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3358 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3359 if (!fKeptPTEs)
3360 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3361 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3362}
3363
3364
3365/**
3366 * Flushes a list of shadow page tables mapping the same physical page.
3367 *
3368 * @param pVM The VM handle.
3369 * @param pPhysPage The guest page in question.
3370 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3371 * @param iPhysExt The physical cross reference extent list to flush.
3372 */
3373static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3374{
3375 Assert(PGMIsLockOwner(pVM));
3376 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3377 bool fKeepList = false;
3378
3379 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3380 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3381
3382 const uint16_t iPhysExtStart = iPhysExt;
3383 PPGMPOOLPHYSEXT pPhysExt;
3384 do
3385 {
3386 Assert(iPhysExt < pPool->cMaxPhysExts);
3387 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3388 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3389 {
3390 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3391 {
3392 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3393 if (!fKeptPTEs)
3394 {
3395 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3396 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3397 }
3398 else
3399 fKeepList = true;
3400 }
3401 }
3402 /* next */
3403 iPhysExt = pPhysExt->iNext;
3404 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3405
3406 if (!fKeepList)
3407 {
3408 /* insert the list into the free list and clear the ram range entry. */
3409 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3410 pPool->iPhysExtFreeHead = iPhysExtStart;
3411 /* Invalidate the tracking data. */
3412 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3413 }
3414
3415 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3416}
3417
3418
3419/**
3420 * Flushes all shadow page table mappings of the given guest page.
3421 *
3422 * This is typically called when the host page backing the guest one has been
3423 * replaced or when the page protection was changed due to a guest access
3424 * caught by the monitoring.
3425 *
3426 * @returns VBox status code.
3427 * @retval VINF_SUCCESS if all references has been successfully cleared.
3428 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3429 * pool cleaning. FF and sync flags are set.
3430 *
3431 * @param pVM The VM handle.
3432 * @param GCPhysPage GC physical address of the page in question
3433 * @param pPhysPage The guest page in question.
3434 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3435 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3436 * flushed, it is NOT touched if this isn't necessary.
3437 * The caller MUST initialized this to @a false.
3438 */
3439int pgmPoolTrackUpdateGCPhys(PVM pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3440{
3441 PVMCPU pVCpu = VMMGetCpu(pVM);
3442 pgmLock(pVM);
3443 int rc = VINF_SUCCESS;
3444
3445#ifdef PGM_WITH_LARGE_PAGES
3446 /* Is this page part of a large page? */
3447 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3448 {
3449 PPGMPAGE pPhysBase;
3450 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3451
3452 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3453
3454 /* Fetch the large page base. */
3455 if (GCPhysBase != GCPhysPage)
3456 {
3457 pPhysBase = pgmPhysGetPage(&pVM->pgm.s, GCPhysBase);
3458 AssertFatal(pPhysBase);
3459 }
3460 else
3461 pPhysBase = pPhysPage;
3462
3463 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3464
3465 if (PGM_PAGE_GET_PDE_TYPE(pPhysBase) == PGM_PAGE_PDE_TYPE_PDE)
3466 {
3467 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3468 PGM_PAGE_SET_PDE_TYPE(pPhysBase, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3469
3470 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3471 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pPhysBase, fFlushPTEs, pfFlushTLBs);
3472
3473 *pfFlushTLBs = true;
3474 pgmUnlock(pVM);
3475 return rc;
3476 }
3477 }
3478#else
3479 NOREF(GCPhysPage);
3480#endif /* PGM_WITH_LARGE_PAGES */
3481
3482 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3483 if (u16)
3484 {
3485 /*
3486 * The zero page is currently screwing up the tracking and we'll
3487 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3488 * is defined, zero pages won't normally be mapped. Some kind of solution
3489 * will be needed for this problem of course, but it will have to wait...
3490 */
3491 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3492 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3493 rc = VINF_PGM_GCPHYS_ALIASED;
3494 else
3495 {
3496# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC) /** @todo we can drop this now. */
3497 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3498 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3499 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
3500# endif
3501
3502 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3503 {
3504 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3505 pgmPoolTrackFlushGCPhysPT(pVM,
3506 pPhysPage,
3507 fFlushPTEs,
3508 PGMPOOL_TD_GET_IDX(u16));
3509 }
3510 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3511 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3512 else
3513 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3514 *pfFlushTLBs = true;
3515
3516# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
3517 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
3518# endif
3519 }
3520 }
3521
3522 if (rc == VINF_PGM_GCPHYS_ALIASED)
3523 {
3524 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3525 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3526 rc = VINF_PGM_SYNC_CR3;
3527 }
3528 pgmUnlock(pVM);
3529 return rc;
3530}
3531
3532
3533/**
3534 * Scans all shadow page tables for mappings of a physical page.
3535 *
3536 * This may be slow, but it's most likely more efficient than cleaning
3537 * out the entire page pool / cache.
3538 *
3539 * @returns VBox status code.
3540 * @retval VINF_SUCCESS if all references has been successfully cleared.
3541 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3542 * a page pool cleaning.
3543 *
3544 * @param pVM The VM handle.
3545 * @param pPhysPage The guest page in question.
3546 */
3547int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3548{
3549 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3550 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3551 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3552 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3553
3554 /*
3555 * There is a limit to what makes sense.
3556 */
3557 if ( pPool->cPresent > 1024
3558 && pVM->cCpus == 1)
3559 {
3560 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3561 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3562 return VINF_PGM_GCPHYS_ALIASED;
3563 }
3564
3565 /*
3566 * Iterate all the pages until we've encountered all that in use.
3567 * This is simple but not quite optimal solution.
3568 */
3569 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P; /** @todo drop X86_PTE_P here as we always test if present separately, anyway. */
3570 const uint32_t u32 = u64; /** @todo move into the 32BIT_PT_xx case */
3571 unsigned cLeft = pPool->cUsedPages;
3572 unsigned iPage = pPool->cCurPages;
3573 while (--iPage >= PGMPOOL_IDX_FIRST)
3574 {
3575 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3576 if ( pPage->GCPhys != NIL_RTGCPHYS
3577 && pPage->cPresent)
3578 {
3579 switch (pPage->enmKind)
3580 {
3581 /*
3582 * We only care about shadow page tables.
3583 */
3584 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3585 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3586 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3587 {
3588 unsigned cPresent = pPage->cPresent;
3589 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3590 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3591 if (pPT->a[i].n.u1Present)
3592 {
3593 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3594 {
3595 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3596 pPT->a[i].u = 0;
3597
3598 /* Update the counter as we're removing references. */
3599 Assert(pPage->cPresent);
3600 Assert(pPool->cPresent);
3601 pPage->cPresent--;
3602 pPool->cPresent--;
3603 }
3604 if (!--cPresent)
3605 break;
3606 }
3607 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3608 break;
3609 }
3610
3611 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3612 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3613 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3614 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3615 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3616 {
3617 unsigned cPresent = pPage->cPresent;
3618 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3619 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3620 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3621 {
3622 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3623 {
3624 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3625 PGMSHWPTEPAE_SET(pPT->a[i], 0); /// @todo why not atomic?
3626
3627 /* Update the counter as we're removing references. */
3628 Assert(pPage->cPresent);
3629 Assert(pPool->cPresent);
3630 pPage->cPresent--;
3631 pPool->cPresent--;
3632 }
3633 if (!--cPresent)
3634 break;
3635 }
3636 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3637 break;
3638 }
3639#ifndef IN_RC
3640 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3641 {
3642 unsigned cPresent = pPage->cPresent;
3643 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3644 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3645 if (pPT->a[i].n.u1Present)
3646 {
3647 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3648 {
3649 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3650 pPT->a[i].u = 0;
3651
3652 /* Update the counter as we're removing references. */
3653 Assert(pPage->cPresent);
3654 Assert(pPool->cPresent);
3655 pPage->cPresent--;
3656 pPool->cPresent--;
3657 }
3658 if (!--cPresent)
3659 break;
3660 }
3661 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3662 break;
3663 }
3664#endif
3665 }
3666 if (!--cLeft)
3667 break;
3668 }
3669 }
3670
3671 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3672 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3673
3674 /*
3675 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3676 */
3677 if (pPool->cPresent > 1024)
3678 {
3679 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3680 return VINF_PGM_GCPHYS_ALIASED;
3681 }
3682
3683 return VINF_SUCCESS;
3684}
3685
3686
3687/**
3688 * Clears the user entry in a user table.
3689 *
3690 * This is used to remove all references to a page when flushing it.
3691 */
3692static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3693{
3694 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3695 Assert(pUser->iUser < pPool->cCurPages);
3696 uint32_t iUserTable = pUser->iUserTable;
3697
3698 /*
3699 * Map the user page.
3700 */
3701 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3702 union
3703 {
3704 uint64_t *pau64;
3705 uint32_t *pau32;
3706 } u;
3707 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3708
3709 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3710
3711 /* Safety precaution in case we change the paging for other modes too in the future. */
3712 Assert(!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage));
3713
3714#ifdef VBOX_STRICT
3715 /*
3716 * Some sanity checks.
3717 */
3718 switch (pUserPage->enmKind)
3719 {
3720 case PGMPOOLKIND_32BIT_PD:
3721 case PGMPOOLKIND_32BIT_PD_PHYS:
3722 Assert(iUserTable < X86_PG_ENTRIES);
3723 break;
3724 case PGMPOOLKIND_PAE_PDPT:
3725 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3726 case PGMPOOLKIND_PAE_PDPT_PHYS:
3727 Assert(iUserTable < 4);
3728 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3729 break;
3730 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3731 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3732 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3733 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3734 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3735 case PGMPOOLKIND_PAE_PD_PHYS:
3736 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3737 break;
3738 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3739 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3740 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3741 break;
3742 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3743 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3744 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3745 break;
3746 case PGMPOOLKIND_64BIT_PML4:
3747 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3748 /* GCPhys >> PAGE_SHIFT is the index here */
3749 break;
3750 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3751 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3752 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3753 break;
3754
3755 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3756 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3757 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3758 break;
3759
3760 case PGMPOOLKIND_ROOT_NESTED:
3761 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3762 break;
3763
3764 default:
3765 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3766 break;
3767 }
3768#endif /* VBOX_STRICT */
3769
3770 /*
3771 * Clear the entry in the user page.
3772 */
3773 switch (pUserPage->enmKind)
3774 {
3775 /* 32-bit entries */
3776 case PGMPOOLKIND_32BIT_PD:
3777 case PGMPOOLKIND_32BIT_PD_PHYS:
3778 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
3779 break;
3780
3781 /* 64-bit entries */
3782 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3783 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3784 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3785 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3786 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3787#ifdef IN_RC
3788 /*
3789 * In 32 bits PAE mode we *must* invalidate the TLB when changing a
3790 * PDPT entry; the CPU fetches them only during cr3 load, so any
3791 * non-present PDPT will continue to cause page faults.
3792 */
3793 ASMReloadCR3();
3794 /* no break */
3795#endif
3796 case PGMPOOLKIND_PAE_PD_PHYS:
3797 case PGMPOOLKIND_PAE_PDPT_PHYS:
3798 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3799 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3800 case PGMPOOLKIND_64BIT_PML4:
3801 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3802 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3803 case PGMPOOLKIND_PAE_PDPT:
3804 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3805 case PGMPOOLKIND_ROOT_NESTED:
3806 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3807 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3808 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
3809 break;
3810
3811 default:
3812 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3813 }
3814 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
3815}
3816
3817
3818/**
3819 * Clears all users of a page.
3820 */
3821static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3822{
3823 /*
3824 * Free all the user records.
3825 */
3826 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3827
3828 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3829 uint16_t i = pPage->iUserHead;
3830 while (i != NIL_PGMPOOL_USER_INDEX)
3831 {
3832 /* Clear enter in user table. */
3833 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3834
3835 /* Free it. */
3836 const uint16_t iNext = paUsers[i].iNext;
3837 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3838 paUsers[i].iNext = pPool->iUserFreeHead;
3839 pPool->iUserFreeHead = i;
3840
3841 /* Next. */
3842 i = iNext;
3843 }
3844 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3845}
3846
3847
3848/**
3849 * Allocates a new physical cross reference extent.
3850 *
3851 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3852 * @param pVM The VM handle.
3853 * @param piPhysExt Where to store the phys ext index.
3854 */
3855PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3856{
3857 Assert(PGMIsLockOwner(pVM));
3858 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3859 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3860 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3861 {
3862 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3863 return NULL;
3864 }
3865 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3866 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3867 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3868 *piPhysExt = iPhysExt;
3869 return pPhysExt;
3870}
3871
3872
3873/**
3874 * Frees a physical cross reference extent.
3875 *
3876 * @param pVM The VM handle.
3877 * @param iPhysExt The extent to free.
3878 */
3879void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3880{
3881 Assert(PGMIsLockOwner(pVM));
3882 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3883 Assert(iPhysExt < pPool->cMaxPhysExts);
3884 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3885 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3886 {
3887 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3888 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3889 }
3890 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3891 pPool->iPhysExtFreeHead = iPhysExt;
3892}
3893
3894
3895/**
3896 * Frees a physical cross reference extent.
3897 *
3898 * @param pVM The VM handle.
3899 * @param iPhysExt The extent to free.
3900 */
3901void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3902{
3903 Assert(PGMIsLockOwner(pVM));
3904 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3905
3906 const uint16_t iPhysExtStart = iPhysExt;
3907 PPGMPOOLPHYSEXT pPhysExt;
3908 do
3909 {
3910 Assert(iPhysExt < pPool->cMaxPhysExts);
3911 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3912 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3913 {
3914 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3915 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3916 }
3917
3918 /* next */
3919 iPhysExt = pPhysExt->iNext;
3920 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3921
3922 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3923 pPool->iPhysExtFreeHead = iPhysExtStart;
3924}
3925
3926
3927/**
3928 * Insert a reference into a list of physical cross reference extents.
3929 *
3930 * @returns The new tracking data for PGMPAGE.
3931 *
3932 * @param pVM The VM handle.
3933 * @param iPhysExt The physical extent index of the list head.
3934 * @param iShwPT The shadow page table index.
3935 * @param iPte Page table entry
3936 *
3937 */
3938static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
3939{
3940 Assert(PGMIsLockOwner(pVM));
3941 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3942 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3943
3944 /*
3945 * Special common cases.
3946 */
3947 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
3948 {
3949 paPhysExts[iPhysExt].aidx[1] = iShwPT;
3950 paPhysExts[iPhysExt].apte[1] = iPte;
3951 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
3952 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
3953 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3954 }
3955 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3956 {
3957 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3958 paPhysExts[iPhysExt].apte[2] = iPte;
3959 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
3960 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
3961 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3962 }
3963 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
3964
3965 /*
3966 * General treatment.
3967 */
3968 const uint16_t iPhysExtStart = iPhysExt;
3969 unsigned cMax = 15;
3970 for (;;)
3971 {
3972 Assert(iPhysExt < pPool->cMaxPhysExts);
3973 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3974 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3975 {
3976 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3977 paPhysExts[iPhysExt].apte[i] = iPte;
3978 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
3979 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
3980 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3981 }
3982 if (!--cMax)
3983 {
3984 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackOverflows);
3985 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3986 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
3987 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3988 }
3989
3990 /* advance */
3991 iPhysExt = paPhysExts[iPhysExt].iNext;
3992 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3993 break;
3994 }
3995
3996 /*
3997 * Add another extent to the list.
3998 */
3999 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4000 if (!pNew)
4001 {
4002 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackNoExtentsLeft);
4003 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4004 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4005 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4006 }
4007 pNew->iNext = iPhysExtStart;
4008 pNew->aidx[0] = iShwPT;
4009 pNew->apte[0] = iPte;
4010 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4011 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4012}
4013
4014
4015/**
4016 * Add a reference to guest physical page where extents are in use.
4017 *
4018 * @returns The new tracking data for PGMPAGE.
4019 *
4020 * @param pVM The VM handle.
4021 * @param pPhysPage Pointer to the aPages entry in the ram range.
4022 * @param u16 The ram range flags (top 16-bits).
4023 * @param iShwPT The shadow page table index.
4024 * @param iPte Page table entry
4025 */
4026uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4027{
4028 pgmLock(pVM);
4029 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4030 {
4031 /*
4032 * Convert to extent list.
4033 */
4034 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4035 uint16_t iPhysExt;
4036 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4037 if (pPhysExt)
4038 {
4039 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4040 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliased);
4041 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4042 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4043 pPhysExt->aidx[1] = iShwPT;
4044 pPhysExt->apte[1] = iPte;
4045 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4046 }
4047 else
4048 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4049 }
4050 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4051 {
4052 /*
4053 * Insert into the extent list.
4054 */
4055 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4056 }
4057 else
4058 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedLots);
4059 pgmUnlock(pVM);
4060 return u16;
4061}
4062
4063
4064/**
4065 * Clear references to guest physical memory.
4066 *
4067 * @param pPool The pool.
4068 * @param pPage The page.
4069 * @param pPhysPage Pointer to the aPages entry in the ram range.
4070 * @param iPte Shadow PTE index
4071 */
4072void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4073{
4074 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4075 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4076
4077 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4078 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4079 {
4080 PVM pVM = pPool->CTX_SUFF(pVM);
4081 pgmLock(pVM);
4082
4083 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4084 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4085 do
4086 {
4087 Assert(iPhysExt < pPool->cMaxPhysExts);
4088
4089 /*
4090 * Look for the shadow page and check if it's all freed.
4091 */
4092 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4093 {
4094 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4095 && paPhysExts[iPhysExt].apte[i] == iPte)
4096 {
4097 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4098 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4099
4100 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4101 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4102 {
4103 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4104 pgmUnlock(pVM);
4105 return;
4106 }
4107
4108 /* we can free the node. */
4109 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4110 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4111 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4112 {
4113 /* lonely node */
4114 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4115 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4116 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
4117 }
4118 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4119 {
4120 /* head */
4121 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4122 PGM_PAGE_SET_TRACKING(pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4123 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4124 }
4125 else
4126 {
4127 /* in list */
4128 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4129 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4130 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4131 }
4132 iPhysExt = iPhysExtNext;
4133 pgmUnlock(pVM);
4134 return;
4135 }
4136 }
4137
4138 /* next */
4139 iPhysExtPrev = iPhysExt;
4140 iPhysExt = paPhysExts[iPhysExt].iNext;
4141 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4142
4143 pgmUnlock(pVM);
4144 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4145 }
4146 else /* nothing to do */
4147 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4148}
4149
4150/**
4151 * Clear references to guest physical memory.
4152 *
4153 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
4154 * is assumed to be correct, so the linear search can be skipped and we can assert
4155 * at an earlier point.
4156 *
4157 * @param pPool The pool.
4158 * @param pPage The page.
4159 * @param HCPhys The host physical address corresponding to the guest page.
4160 * @param GCPhys The guest physical address corresponding to HCPhys.
4161 * @param iPte Shadow PTE index
4162 */
4163static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4164{
4165 /*
4166 * Walk range list.
4167 */
4168 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
4169 while (pRam)
4170 {
4171 RTGCPHYS off = GCPhys - pRam->GCPhys;
4172 if (off < pRam->cb)
4173 {
4174 /* does it match? */
4175 const unsigned iPage = off >> PAGE_SHIFT;
4176 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
4177#ifdef LOG_ENABLED
4178 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
4179 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4180#endif
4181 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4182 {
4183 Assert(pPage->cPresent);
4184 Assert(pPool->cPresent);
4185 pPage->cPresent--;
4186 pPool->cPresent--;
4187 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4188 return;
4189 }
4190 break;
4191 }
4192 pRam = pRam->CTX_SUFF(pNext);
4193 }
4194 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4195}
4196
4197
4198/**
4199 * Clear references to guest physical memory.
4200 *
4201 * @param pPool The pool.
4202 * @param pPage The page.
4203 * @param HCPhys The host physical address corresponding to the guest page.
4204 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4205 * @param iPte Shadow pte index
4206 */
4207void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4208{
4209 RTHCPHYS HCPhysExpected = 0xDEADBEEFDEADBEEFULL;
4210
4211 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4212
4213 /*
4214 * Walk range list.
4215 */
4216 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
4217 while (pRam)
4218 {
4219 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
4220 if (off < pRam->cb)
4221 {
4222 /* does it match? */
4223 const unsigned iPage = off >> PAGE_SHIFT;
4224 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
4225 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4226 {
4227 Assert(pPage->cPresent);
4228 Assert(pPool->cPresent);
4229 pPage->cPresent--;
4230 pPool->cPresent--;
4231 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4232 return;
4233 }
4234 HCPhysExpected = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
4235 break;
4236 }
4237 pRam = pRam->CTX_SUFF(pNext);
4238 }
4239
4240 /*
4241 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4242 */
4243 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4244 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
4245 while (pRam)
4246 {
4247 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4248 while (iPage-- > 0)
4249 {
4250 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4251 {
4252 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4253 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4254 Assert(pPage->cPresent);
4255 Assert(pPool->cPresent);
4256 pPage->cPresent--;
4257 pPool->cPresent--;
4258 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4259 return;
4260 }
4261 }
4262 pRam = pRam->CTX_SUFF(pNext);
4263 }
4264
4265 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Expected HCPhys with hint = %RHp)\n", HCPhys, GCPhysHint, HCPhysExpected));
4266}
4267
4268
4269/**
4270 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4271 *
4272 * @param pPool The pool.
4273 * @param pPage The page.
4274 * @param pShwPT The shadow page table (mapping of the page).
4275 * @param pGstPT The guest page table.
4276 */
4277DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4278{
4279 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4280 {
4281 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4282 if (pShwPT->a[i].n.u1Present)
4283 {
4284 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4285 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4286 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK, i);
4287 if (!pPage->cPresent)
4288 break;
4289 }
4290 }
4291}
4292
4293
4294/**
4295 * Clear references to guest physical memory in a PAE / 32-bit page table.
4296 *
4297 * @param pPool The pool.
4298 * @param pPage The page.
4299 * @param pShwPT The shadow page table (mapping of the page).
4300 * @param pGstPT The guest page table (just a half one).
4301 */
4302DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4303{
4304 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4305 {
4306 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4307 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4308 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4309 {
4310 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4311 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4312 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK, i);
4313 if (!pPage->cPresent)
4314 break;
4315 }
4316 }
4317}
4318
4319
4320/**
4321 * Clear references to guest physical memory in a PAE / PAE page table.
4322 *
4323 * @param pPool The pool.
4324 * @param pPage The page.
4325 * @param pShwPT The shadow page table (mapping of the page).
4326 * @param pGstPT The guest page table.
4327 */
4328DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4329{
4330 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4331 {
4332 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4333 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4334 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4335 {
4336 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4337 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4338 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
4339 if (!pPage->cPresent)
4340 break;
4341 }
4342 }
4343}
4344
4345
4346/**
4347 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4348 *
4349 * @param pPool The pool.
4350 * @param pPage The page.
4351 * @param pShwPT The shadow page table (mapping of the page).
4352 */
4353DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4354{
4355 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4356 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4357 {
4358 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4359 if (pShwPT->a[i].n.u1Present)
4360 {
4361 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4362 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4363 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys, i);
4364 if (!pPage->cPresent)
4365 break;
4366 }
4367 }
4368}
4369
4370
4371/**
4372 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4373 *
4374 * @param pPool The pool.
4375 * @param pPage The page.
4376 * @param pShwPT The shadow page table (mapping of the page).
4377 */
4378DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4379{
4380 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4381 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4382 {
4383 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4384 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4385 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4386 {
4387 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4388 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4389 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys, i);
4390 if (!pPage->cPresent)
4391 break;
4392 }
4393 }
4394}
4395
4396
4397/**
4398 * Clear references to shadowed pages in an EPT page table.
4399 *
4400 * @param pPool The pool.
4401 * @param pPage The page.
4402 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4403 */
4404DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4405{
4406 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4407 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4408 {
4409 Assert((pShwPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4410 if (pShwPT->a[i].n.u1Present)
4411 {
4412 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4413 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4414 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys, i);
4415 if (!pPage->cPresent)
4416 break;
4417 }
4418 }
4419}
4420
4421
4422
4423/**
4424 * Clear references to shadowed pages in a 32 bits page directory.
4425 *
4426 * @param pPool The pool.
4427 * @param pPage The page.
4428 * @param pShwPD The shadow page directory (mapping of the page).
4429 */
4430DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4431{
4432 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4433 {
4434 Assert(!(pShwPD->a[i].u & RT_BIT_32(9)));
4435 if ( pShwPD->a[i].n.u1Present
4436 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4437 )
4438 {
4439 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4440 if (pSubPage)
4441 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4442 else
4443 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4444 }
4445 }
4446}
4447
4448/**
4449 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4450 *
4451 * @param pPool The pool.
4452 * @param pPage The page.
4453 * @param pShwPD The shadow page directory (mapping of the page).
4454 */
4455DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4456{
4457 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4458 {
4459 Assert((pShwPD->a[i].u & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4460 if ( pShwPD->a[i].n.u1Present
4461 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING))
4462 {
4463#ifdef PGM_WITH_LARGE_PAGES
4464 if (pShwPD->a[i].b.u1Size)
4465 {
4466 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4467 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4468 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys /* == base of 2 MB page */, i);
4469 }
4470 else
4471#endif
4472 {
4473 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4474 if (pSubPage)
4475 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4476 else
4477 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4478 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4479 }
4480 }
4481 }
4482}
4483
4484/**
4485 * Clear references to shadowed pages in a PAE page directory pointer table.
4486 *
4487 * @param pPool The pool.
4488 * @param pPage The page.
4489 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4490 */
4491DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4492{
4493 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4494 {
4495 Assert((pShwPDPT->a[i].u & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4496 if ( pShwPDPT->a[i].n.u1Present
4497 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4498 )
4499 {
4500 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4501 if (pSubPage)
4502 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4503 else
4504 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4505 }
4506 }
4507}
4508
4509
4510/**
4511 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4512 *
4513 * @param pPool The pool.
4514 * @param pPage The page.
4515 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4516 */
4517DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4518{
4519 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4520 {
4521 Assert((pShwPDPT->a[i].u & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4522 if (pShwPDPT->a[i].n.u1Present)
4523 {
4524 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4525 if (pSubPage)
4526 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4527 else
4528 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4529 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4530 }
4531 }
4532}
4533
4534
4535/**
4536 * Clear references to shadowed pages in a 64-bit level 4 page table.
4537 *
4538 * @param pPool The pool.
4539 * @param pPage The page.
4540 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4541 */
4542DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4543{
4544 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4545 {
4546 Assert((pShwPML4->a[i].u & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4547 if (pShwPML4->a[i].n.u1Present)
4548 {
4549 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4550 if (pSubPage)
4551 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4552 else
4553 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4554 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4555 }
4556 }
4557}
4558
4559
4560/**
4561 * Clear references to shadowed pages in an EPT page directory.
4562 *
4563 * @param pPool The pool.
4564 * @param pPage The page.
4565 * @param pShwPD The shadow page directory (mapping of the page).
4566 */
4567DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4568{
4569 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4570 {
4571 Assert((pShwPD->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4572 if (pShwPD->a[i].n.u1Present)
4573 {
4574#ifdef PGM_WITH_LARGE_PAGES
4575 if (pShwPD->a[i].b.u1Size)
4576 {
4577 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4578 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4579 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys /* == base of 2 MB page */, i);
4580 }
4581 else
4582#endif
4583 {
4584 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4585 if (pSubPage)
4586 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4587 else
4588 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4589 }
4590 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4591 }
4592 }
4593}
4594
4595
4596/**
4597 * Clear references to shadowed pages in an EPT page directory pointer table.
4598 *
4599 * @param pPool The pool.
4600 * @param pPage The page.
4601 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4602 */
4603DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4604{
4605 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4606 {
4607 Assert((pShwPDPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4608 if (pShwPDPT->a[i].n.u1Present)
4609 {
4610 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4611 if (pSubPage)
4612 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4613 else
4614 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4615 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4616 }
4617 }
4618}
4619
4620
4621/**
4622 * Clears all references made by this page.
4623 *
4624 * This includes other shadow pages and GC physical addresses.
4625 *
4626 * @param pPool The pool.
4627 * @param pPage The page.
4628 */
4629static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4630{
4631 /*
4632 * Map the shadow page and take action according to the page kind.
4633 */
4634 PVM pVM = pPool->CTX_SUFF(pVM);
4635 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4636 switch (pPage->enmKind)
4637 {
4638 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4639 {
4640 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4641 void *pvGst;
4642 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4643 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4644 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4645 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4646 break;
4647 }
4648
4649 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4650 {
4651 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4652 void *pvGst;
4653 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4654 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
4655 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4656 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4657 break;
4658 }
4659
4660 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4661 {
4662 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4663 void *pvGst;
4664 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4665 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
4666 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4667 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4668 break;
4669 }
4670
4671 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4672 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4673 {
4674 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4675 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4676 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4677 break;
4678 }
4679
4680 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4681 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4682 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4683 {
4684 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4685 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
4686 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4687 break;
4688 }
4689
4690 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4691 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4692 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4693 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4694 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4695 case PGMPOOLKIND_PAE_PD_PHYS:
4696 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4697 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4698 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4699 break;
4700
4701 case PGMPOOLKIND_32BIT_PD_PHYS:
4702 case PGMPOOLKIND_32BIT_PD:
4703 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4704 break;
4705
4706 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4707 case PGMPOOLKIND_PAE_PDPT:
4708 case PGMPOOLKIND_PAE_PDPT_PHYS:
4709 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4710 break;
4711
4712 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4713 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4714 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4715 break;
4716
4717 case PGMPOOLKIND_64BIT_PML4:
4718 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4719 break;
4720
4721 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4722 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4723 break;
4724
4725 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4726 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4727 break;
4728
4729 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4730 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4731 break;
4732
4733 default:
4734 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4735 }
4736
4737 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4738 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4739 ASMMemZeroPage(pvShw);
4740 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4741 pPage->fZeroed = true;
4742 Assert(!pPage->cPresent);
4743 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
4744}
4745
4746/**
4747 * Flushes a pool page.
4748 *
4749 * This moves the page to the free list after removing all user references to it.
4750 *
4751 * @returns VBox status code.
4752 * @retval VINF_SUCCESS on success.
4753 * @param pPool The pool.
4754 * @param HCPhys The HC physical address of the shadow page.
4755 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4756 */
4757int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4758{
4759 PVM pVM = pPool->CTX_SUFF(pVM);
4760 bool fFlushRequired = false;
4761
4762 int rc = VINF_SUCCESS;
4763 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4764 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4765 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4766
4767 /*
4768 * Quietly reject any attempts at flushing any of the special root pages.
4769 */
4770 if (pPage->idx < PGMPOOL_IDX_FIRST)
4771 {
4772 AssertFailed(); /* can no longer happen */
4773 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4774 return VINF_SUCCESS;
4775 }
4776
4777 pgmLock(pVM);
4778
4779 /*
4780 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4781 */
4782 if (pgmPoolIsPageLocked(&pVM->pgm.s, pPage))
4783 {
4784 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4785 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4786 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4787 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4788 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4789 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4790 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4791 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4792 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD,
4793 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4794 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4795 pgmUnlock(pVM);
4796 return VINF_SUCCESS;
4797 }
4798
4799#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4800 /* Start a subset so we won't run out of mapping space. */
4801 PVMCPU pVCpu = VMMGetCpu(pVM);
4802 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
4803#endif
4804
4805 /*
4806 * Mark the page as being in need of an ASMMemZeroPage().
4807 */
4808 pPage->fZeroed = false;
4809
4810#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4811 if (pPage->fDirty)
4812 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirty, false /* do not remove */);
4813#endif
4814
4815 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4816 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4817 fFlushRequired = true;
4818
4819 /*
4820 * Clear the page.
4821 */
4822 pgmPoolTrackClearPageUsers(pPool, pPage);
4823 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4824 pgmPoolTrackDeref(pPool, pPage);
4825 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4826
4827 /*
4828 * Flush it from the cache.
4829 */
4830 pgmPoolCacheFlushPage(pPool, pPage);
4831
4832#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4833 /* Heavy stuff done. */
4834 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
4835#endif
4836
4837 /*
4838 * Deregistering the monitoring.
4839 */
4840 if (pPage->fMonitored)
4841 rc = pgmPoolMonitorFlush(pPool, pPage);
4842
4843 /*
4844 * Free the page.
4845 */
4846 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4847 pPage->iNext = pPool->iFreeHead;
4848 pPool->iFreeHead = pPage->idx;
4849 pPage->enmKind = PGMPOOLKIND_FREE;
4850 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4851 pPage->GCPhys = NIL_RTGCPHYS;
4852 pPage->fReusedFlushPending = false;
4853
4854 pPool->cUsedPages--;
4855
4856 /* Flush the TLBs of all VCPUs if required. */
4857 if ( fFlushRequired
4858 && fFlush)
4859 {
4860 PGM_INVL_ALL_VCPU_TLBS(pVM);
4861 }
4862
4863 pgmUnlock(pVM);
4864 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4865 return rc;
4866}
4867
4868
4869/**
4870 * Frees a usage of a pool page.
4871 *
4872 * The caller is responsible to updating the user table so that it no longer
4873 * references the shadow page.
4874 *
4875 * @param pPool The pool.
4876 * @param HCPhys The HC physical address of the shadow page.
4877 * @param iUser The shadow page pool index of the user table.
4878 * @param iUserTable The index into the user table (shadowed).
4879 */
4880void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4881{
4882 PVM pVM = pPool->CTX_SUFF(pVM);
4883
4884 STAM_PROFILE_START(&pPool->StatFree, a);
4885 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
4886 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4887 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4888 pgmLock(pVM);
4889 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4890 if (!pPage->fCached)
4891 pgmPoolFlushPage(pPool, pPage);
4892 pgmUnlock(pVM);
4893 STAM_PROFILE_STOP(&pPool->StatFree, a);
4894}
4895
4896
4897/**
4898 * Makes one or more free page free.
4899 *
4900 * @returns VBox status code.
4901 * @retval VINF_SUCCESS on success.
4902 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4903 *
4904 * @param pPool The pool.
4905 * @param enmKind Page table kind
4906 * @param iUser The user of the page.
4907 */
4908static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4909{
4910 PVM pVM = pPool->CTX_SUFF(pVM);
4911
4912 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%d\n", iUser));
4913
4914 /*
4915 * If the pool isn't full grown yet, expand it.
4916 */
4917 if ( pPool->cCurPages < pPool->cMaxPages
4918#if defined(IN_RC)
4919 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
4920 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4921 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
4922#endif
4923 )
4924 {
4925 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4926#ifdef IN_RING3
4927 int rc = PGMR3PoolGrow(pVM);
4928#else
4929 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
4930#endif
4931 if (RT_FAILURE(rc))
4932 return rc;
4933 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4934 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4935 return VINF_SUCCESS;
4936 }
4937
4938 /*
4939 * Free one cached page.
4940 */
4941 return pgmPoolCacheFreeOne(pPool, iUser);
4942}
4943
4944/**
4945 * Allocates a page from the pool.
4946 *
4947 * This page may actually be a cached page and not in need of any processing
4948 * on the callers part.
4949 *
4950 * @returns VBox status code.
4951 * @retval VINF_SUCCESS if a NEW page was allocated.
4952 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4953 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4954 * @param pVM The VM handle.
4955 * @param GCPhys The GC physical address of the page we're gonna shadow.
4956 * For 4MB and 2MB PD entries, it's the first address the
4957 * shadow PT is covering.
4958 * @param enmKind The kind of mapping.
4959 * @param enmAccess Access type for the mapping (only relevant for big pages)
4960 * @param iUser The shadow page pool index of the user table.
4961 * @param iUserTable The index into the user table (shadowed).
4962 * @param fLockPage Lock the page
4963 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4964 */
4965int pgmPoolAllocEx(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable,
4966 bool fLockPage, PPPGMPOOLPAGE ppPage)
4967{
4968 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4969 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4970 LogFlow(("pgmPoolAllocEx: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4971 *ppPage = NULL;
4972 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4973 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4974 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4975
4976 pgmLock(pVM);
4977
4978 if (pPool->fCacheEnabled)
4979 {
4980 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, iUser, iUserTable, ppPage);
4981 if (RT_SUCCESS(rc2))
4982 {
4983 if (fLockPage)
4984 pgmPoolLockPage(pPool, *ppPage);
4985 pgmUnlock(pVM);
4986 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4987 LogFlow(("pgmPoolAllocEx: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4988 return rc2;
4989 }
4990 }
4991
4992 /*
4993 * Allocate a new one.
4994 */
4995 int rc = VINF_SUCCESS;
4996 uint16_t iNew = pPool->iFreeHead;
4997 if (iNew == NIL_PGMPOOL_IDX)
4998 {
4999 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
5000 if (RT_FAILURE(rc))
5001 {
5002 pgmUnlock(pVM);
5003 Log(("pgmPoolAllocEx: returns %Rrc (Free)\n", rc));
5004 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5005 return rc;
5006 }
5007 iNew = pPool->iFreeHead;
5008 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
5009 }
5010
5011 /* unlink the free head */
5012 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5013 pPool->iFreeHead = pPage->iNext;
5014 pPage->iNext = NIL_PGMPOOL_IDX;
5015
5016 /*
5017 * Initialize it.
5018 */
5019 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5020 pPage->enmKind = enmKind;
5021 pPage->enmAccess = enmAccess;
5022 pPage->GCPhys = GCPhys;
5023 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5024 pPage->fMonitored = false;
5025 pPage->fCached = false;
5026#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5027 pPage->fDirty = false;
5028#endif
5029 pPage->fReusedFlushPending = false;
5030 pPage->cModifications = 0;
5031 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5032 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5033 pPage->cLocked = 0;
5034 pPage->cPresent = 0;
5035 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5036 pPage->pvLastAccessHandlerFault = 0;
5037 pPage->cLastAccessHandlerCount = 0;
5038 pPage->pvLastAccessHandlerRip = 0;
5039
5040 /*
5041 * Insert into the tracking and cache. If this fails, free the page.
5042 */
5043 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5044 if (RT_FAILURE(rc3))
5045 {
5046 pPool->cUsedPages--;
5047 pPage->enmKind = PGMPOOLKIND_FREE;
5048 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5049 pPage->GCPhys = NIL_RTGCPHYS;
5050 pPage->iNext = pPool->iFreeHead;
5051 pPool->iFreeHead = pPage->idx;
5052 pgmUnlock(pVM);
5053 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5054 Log(("pgmPoolAllocEx: returns %Rrc (Insert)\n", rc3));
5055 return rc3;
5056 }
5057
5058 /*
5059 * Commit the allocation, clear the page and return.
5060 */
5061#ifdef VBOX_WITH_STATISTICS
5062 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5063 pPool->cUsedPagesHigh = pPool->cUsedPages;
5064#endif
5065
5066 if (!pPage->fZeroed)
5067 {
5068 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5069 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5070 ASMMemZeroPage(pv);
5071 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5072 }
5073
5074 *ppPage = pPage;
5075 if (fLockPage)
5076 pgmPoolLockPage(pPool, pPage);
5077 pgmUnlock(pVM);
5078 LogFlow(("pgmPoolAllocEx: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5079 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5080 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5081 return rc;
5082}
5083
5084
5085/**
5086 * Frees a usage of a pool page.
5087 *
5088 * @param pVM The VM handle.
5089 * @param HCPhys The HC physical address of the shadow page.
5090 * @param iUser The shadow page pool index of the user table.
5091 * @param iUserTable The index into the user table (shadowed).
5092 */
5093void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5094{
5095 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5096 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5097 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5098}
5099
5100/**
5101 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5102 *
5103 * @returns Pointer to the shadow page structure.
5104 * @param pPool The pool.
5105 * @param HCPhys The HC physical address of the shadow page.
5106 */
5107PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5108{
5109 PVM pVM = pPool->CTX_SUFF(pVM);
5110
5111 Assert(PGMIsLockOwner(pVM));
5112
5113 /*
5114 * Look up the page.
5115 */
5116 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5117
5118 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5119 return pPage;
5120}
5121
5122
5123/**
5124 * Internal worker for finding a page for debugging purposes, no assertions.
5125 *
5126 * @returns Pointer to the shadow page structure. NULL on if not found.
5127 * @param pPool The pool.
5128 * @param HCPhys The HC physical address of the shadow page.
5129 */
5130PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5131{
5132 PVM pVM = pPool->CTX_SUFF(pVM);
5133 Assert(PGMIsLockOwner(pVM));
5134 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5135}
5136
5137
5138#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5139/**
5140 * Flush the specified page if present
5141 *
5142 * @param pVM The VM handle.
5143 * @param GCPhys Guest physical address of the page to flush
5144 */
5145void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5146{
5147 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5148
5149 VM_ASSERT_EMT(pVM);
5150
5151 /*
5152 * Look up the GCPhys in the hash.
5153 */
5154 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5155 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5156 if (i == NIL_PGMPOOL_IDX)
5157 return;
5158
5159 do
5160 {
5161 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5162 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5163 {
5164 switch (pPage->enmKind)
5165 {
5166 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5167 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5168 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5169 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5170 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5171 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5172 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5173 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5174 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5175 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5176 case PGMPOOLKIND_64BIT_PML4:
5177 case PGMPOOLKIND_32BIT_PD:
5178 case PGMPOOLKIND_PAE_PDPT:
5179 {
5180 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5181#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5182 if (pPage->fDirty)
5183 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5184 else
5185#endif
5186 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5187 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
5188 pgmPoolMonitorChainFlush(pPool, pPage);
5189 return;
5190 }
5191
5192 /* ignore, no monitoring. */
5193 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5194 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5195 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5196 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5197 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5198 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5199 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5200 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5201 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5202 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5203 case PGMPOOLKIND_ROOT_NESTED:
5204 case PGMPOOLKIND_PAE_PD_PHYS:
5205 case PGMPOOLKIND_PAE_PDPT_PHYS:
5206 case PGMPOOLKIND_32BIT_PD_PHYS:
5207 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5208 break;
5209
5210 default:
5211 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5212 }
5213 }
5214
5215 /* next */
5216 i = pPage->iNext;
5217 } while (i != NIL_PGMPOOL_IDX);
5218 return;
5219}
5220#endif /* IN_RING3 */
5221
5222#ifdef IN_RING3
5223
5224
5225/**
5226 * Reset CPU on hot plugging.
5227 *
5228 * @param pVM The VM handle.
5229 * @param pVCpu The virtual CPU.
5230 */
5231void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5232{
5233 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
5234
5235 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5236 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5237 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5238}
5239
5240
5241/**
5242 * Flushes the entire cache.
5243 *
5244 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5245 * this and execute this CR3 flush.
5246 *
5247 * @param pPool The pool.
5248 */
5249void pgmR3PoolReset(PVM pVM)
5250{
5251 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5252
5253 Assert(PGMIsLockOwner(pVM));
5254 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5255 LogFlow(("pgmR3PoolReset:\n"));
5256
5257 /*
5258 * If there are no pages in the pool, there is nothing to do.
5259 */
5260 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5261 {
5262 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5263 return;
5264 }
5265
5266 /*
5267 * Exit the shadow mode since we're going to clear everything,
5268 * including the root page.
5269 */
5270 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5271 {
5272 PVMCPU pVCpu = &pVM->aCpus[i];
5273 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
5274 }
5275
5276 /*
5277 * Nuke the free list and reinsert all pages into it.
5278 */
5279 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5280 {
5281 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5282
5283 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
5284 if (pPage->fMonitored)
5285 pgmPoolMonitorFlush(pPool, pPage);
5286 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5287 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5288 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5289 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5290 pPage->cModifications = 0;
5291 pPage->GCPhys = NIL_RTGCPHYS;
5292 pPage->enmKind = PGMPOOLKIND_FREE;
5293 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5294 Assert(pPage->idx == i);
5295 pPage->iNext = i + 1;
5296 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5297 pPage->fSeenNonGlobal = false;
5298 pPage->fMonitored = false;
5299#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5300 pPage->fDirty = false;
5301#endif
5302 pPage->fCached = false;
5303 pPage->fReusedFlushPending = false;
5304 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5305 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5306 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5307 pPage->cLocked = 0;
5308 }
5309 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5310 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5311 pPool->cUsedPages = 0;
5312
5313 /*
5314 * Zap and reinitialize the user records.
5315 */
5316 pPool->cPresent = 0;
5317 pPool->iUserFreeHead = 0;
5318 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5319 const unsigned cMaxUsers = pPool->cMaxUsers;
5320 for (unsigned i = 0; i < cMaxUsers; i++)
5321 {
5322 paUsers[i].iNext = i + 1;
5323 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5324 paUsers[i].iUserTable = 0xfffffffe;
5325 }
5326 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5327
5328 /*
5329 * Clear all the GCPhys links and rebuild the phys ext free list.
5330 */
5331 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
5332 pRam;
5333 pRam = pRam->CTX_SUFF(pNext))
5334 {
5335 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5336 while (iPage-- > 0)
5337 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
5338 }
5339
5340 pPool->iPhysExtFreeHead = 0;
5341 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5342 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5343 for (unsigned i = 0; i < cMaxPhysExts; i++)
5344 {
5345 paPhysExts[i].iNext = i + 1;
5346 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5347 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5348 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5349 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5350 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5351 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5352 }
5353 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5354
5355 /*
5356 * Just zap the modified list.
5357 */
5358 pPool->cModifiedPages = 0;
5359 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5360
5361 /*
5362 * Clear the GCPhys hash and the age list.
5363 */
5364 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5365 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5366 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5367 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5368
5369#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5370 /* Clear all dirty pages. */
5371 pPool->idxFreeDirtyPage = 0;
5372 pPool->cDirtyPages = 0;
5373 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
5374 pPool->aDirtyPages[i].uIdx = NIL_PGMPOOL_IDX;
5375#endif
5376
5377 /*
5378 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5379 */
5380 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
5381 {
5382 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5383 pPage->iNext = NIL_PGMPOOL_IDX;
5384 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5385 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5386 pPage->cModifications = 0;
5387 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
5388 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5389 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5390 if (pPage->fMonitored)
5391 {
5392 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK,
5393 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
5394 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
5395 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
5396 pPool->pszAccessHandler);
5397 AssertFatalRCSuccess(rc);
5398 pgmPoolHashInsert(pPool, pPage);
5399 }
5400 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
5401 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
5402 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
5403 }
5404
5405 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5406 {
5407 /*
5408 * Re-enter the shadowing mode and assert Sync CR3 FF.
5409 */
5410 PVMCPU pVCpu = &pVM->aCpus[i];
5411 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5412 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5413 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5414 }
5415
5416 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5417}
5418#endif /* IN_RING3 */
5419
5420#ifdef LOG_ENABLED
5421static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5422{
5423 switch(enmKind)
5424 {
5425 case PGMPOOLKIND_INVALID:
5426 return "PGMPOOLKIND_INVALID";
5427 case PGMPOOLKIND_FREE:
5428 return "PGMPOOLKIND_FREE";
5429 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5430 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5431 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5432 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5433 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5434 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5435 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5436 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5437 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5438 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5439 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5440 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5441 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5442 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5443 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5444 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5445 case PGMPOOLKIND_32BIT_PD:
5446 return "PGMPOOLKIND_32BIT_PD";
5447 case PGMPOOLKIND_32BIT_PD_PHYS:
5448 return "PGMPOOLKIND_32BIT_PD_PHYS";
5449 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5450 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5451 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5452 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5453 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5454 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5455 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5456 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5457 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5458 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5459 case PGMPOOLKIND_PAE_PD_PHYS:
5460 return "PGMPOOLKIND_PAE_PD_PHYS";
5461 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5462 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5463 case PGMPOOLKIND_PAE_PDPT:
5464 return "PGMPOOLKIND_PAE_PDPT";
5465 case PGMPOOLKIND_PAE_PDPT_PHYS:
5466 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5467 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5468 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5469 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5470 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5471 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5472 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5473 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5474 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5475 case PGMPOOLKIND_64BIT_PML4:
5476 return "PGMPOOLKIND_64BIT_PML4";
5477 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5478 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5479 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5480 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5481 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5482 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5483 case PGMPOOLKIND_ROOT_NESTED:
5484 return "PGMPOOLKIND_ROOT_NESTED";
5485 }
5486 return "Unknown kind!";
5487}
5488#endif /* LOG_ENABLED*/
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette