VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 37608

Last change on this file since 37608 was 37354, checked in by vboxsync, 14 years ago

PGM: Fixed locking issues in PGMR3PhysMMIORegister and PGMR3PhysMMIODeregister. Also addressed a harmless on in PGMR3PhysRomRegister (only used at init time, so no races). Fortified the code with assertions more lock assertion, replacing the incorrect PGMIsLocked() checks (we only care if the current thread is the lock owner). Cleaned up some ReturnStmt macros and adding more of them.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 208.1 KB
Line 
1/* $Id: PGMAllPool.cpp 37354 2011-06-07 15:05:32Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2010 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#include <VBox/vmm/pgm.h>
24#include <VBox/vmm/mm.h>
25#include <VBox/vmm/em.h>
26#include <VBox/vmm/cpum.h>
27#ifdef IN_RC
28# include <VBox/vmm/patm.h>
29#endif
30#include "PGMInternal.h"
31#include <VBox/vmm/vm.h>
32#include "PGMInline.h"
33#include <VBox/disopcode.h>
34#include <VBox/vmm/hwacc_vmx.h>
35
36#include <VBox/log.h>
37#include <VBox/err.h>
38#include <iprt/asm.h>
39#include <iprt/asm-amd64-x86.h>
40#include <iprt/string.h>
41
42
43/*******************************************************************************
44* Internal Functions *
45*******************************************************************************/
46RT_C_DECLS_BEGIN
47static void pgmPoolFlushAllInt(PPGMPOOL pPool);
48DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
49DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
50static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
51static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
52static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53#ifndef IN_RING3
54DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
55#endif
56#ifdef LOG_ENABLED
57static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
58#endif
59#if defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)
60static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
61#endif
62
63int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
64PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
65void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
66void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
67
68RT_C_DECLS_END
69
70
71/**
72 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
73 *
74 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
75 * @param enmKind The page kind.
76 */
77DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
78{
79 switch (enmKind)
80 {
81 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
82 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
83 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
84 return true;
85 default:
86 return false;
87 }
88}
89
90
91/**
92 * Flushes a chain of pages sharing the same access monitor.
93 *
94 * @returns VBox status code suitable for scheduling.
95 * @param pPool The pool.
96 * @param pPage A page in the chain.
97 * @todo VBOXSTRICTRC
98 */
99int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
100{
101 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
102
103 /*
104 * Find the list head.
105 */
106 uint16_t idx = pPage->idx;
107 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
108 {
109 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
110 {
111 idx = pPage->iMonitoredPrev;
112 Assert(idx != pPage->idx);
113 pPage = &pPool->aPages[idx];
114 }
115 }
116
117 /*
118 * Iterate the list flushing each shadow page.
119 */
120 int rc = VINF_SUCCESS;
121 for (;;)
122 {
123 idx = pPage->iMonitoredNext;
124 Assert(idx != pPage->idx);
125 if (pPage->idx >= PGMPOOL_IDX_FIRST)
126 {
127 int rc2 = pgmPoolFlushPage(pPool, pPage);
128 AssertRC(rc2);
129 }
130 /* next */
131 if (idx == NIL_PGMPOOL_IDX)
132 break;
133 pPage = &pPool->aPages[idx];
134 }
135 return rc;
136}
137
138
139/**
140 * Wrapper for getting the current context pointer to the entry being modified.
141 *
142 * @returns VBox status code suitable for scheduling.
143 * @param pVM VM Handle.
144 * @param pvDst Destination address
145 * @param pvSrc Source guest virtual address.
146 * @param GCPhysSrc The source guest physical address.
147 * @param cb Size of data to read
148 */
149DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
150{
151#if defined(IN_RING3)
152 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
153 return VINF_SUCCESS;
154#else
155 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
156 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
157#endif
158}
159
160/**
161 * Process shadow entries before they are changed by the guest.
162 *
163 * For PT entries we will clear them. For PD entries, we'll simply check
164 * for mapping conflicts and set the SyncCR3 FF if found.
165 *
166 * @param pVCpu VMCPU handle
167 * @param pPool The pool.
168 * @param pPage The head page.
169 * @param GCPhysFault The guest physical fault address.
170 * @param uAddress In R0 and GC this is the guest context fault address (flat).
171 * In R3 this is the host context 'fault' address.
172 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
173 */
174void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, unsigned cbWrite)
175{
176 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
177 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
178 PVM pVM = pPool->CTX_SUFF(pVM);
179
180 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))pvAddress, GCPhysFault, cbWrite));
181
182 for (;;)
183 {
184 union
185 {
186 void *pv;
187 PX86PT pPT;
188 PPGMSHWPTPAE pPTPae;
189 PX86PD pPD;
190 PX86PDPAE pPDPae;
191 PX86PDPT pPDPT;
192 PX86PML4 pPML4;
193 } uShw;
194
195 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s\n", pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
196
197 uShw.pv = NULL;
198 switch (pPage->enmKind)
199 {
200 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
201 {
202 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
203 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
204 const unsigned iShw = off / sizeof(X86PTE);
205 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
206 if (uShw.pPT->a[iShw].n.u1Present)
207 {
208 X86PTE GstPte;
209
210 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
211 AssertRC(rc);
212 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
213 pgmPoolTracDerefGCPhysHint(pPool, pPage,
214 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
215 GstPte.u & X86_PTE_PG_MASK,
216 iShw);
217 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
218 }
219 break;
220 }
221
222 /* page/2 sized */
223 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
224 {
225 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
226 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
227 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
228 {
229 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
230 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
231 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
232 {
233 X86PTE GstPte;
234 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
235 AssertRC(rc);
236
237 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
238 pgmPoolTracDerefGCPhysHint(pPool, pPage,
239 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
240 GstPte.u & X86_PTE_PG_MASK,
241 iShw);
242 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
243 }
244 }
245 break;
246 }
247
248 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
249 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
250 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
251 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
252 {
253 unsigned iGst = off / sizeof(X86PDE);
254 unsigned iShwPdpt = iGst / 256;
255 unsigned iShw = (iGst % 256) * 2;
256 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
257
258 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
259 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
260 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
261 {
262 for (unsigned i = 0; i < 2; i++)
263 {
264# ifndef IN_RING0
265 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
266 {
267 Assert(pgmMapAreMappingsEnabled(pVM));
268 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
269 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
270 break;
271 }
272 else
273# endif /* !IN_RING0 */
274 if (uShw.pPDPae->a[iShw+i].n.u1Present)
275 {
276 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
277 pgmPoolFree(pVM,
278 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
279 pPage->idx,
280 iShw + i);
281 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw+i].u, 0);
282 }
283
284 /* paranoia / a bit assumptive. */
285 if ( (off & 3)
286 && (off & 3) + cbWrite > 4)
287 {
288 const unsigned iShw2 = iShw + 2 + i;
289 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
290 {
291# ifndef IN_RING0
292 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
293 {
294 Assert(pgmMapAreMappingsEnabled(pVM));
295 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
296 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
297 break;
298 }
299 else
300# endif /* !IN_RING0 */
301 if (uShw.pPDPae->a[iShw2].n.u1Present)
302 {
303 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
304 pgmPoolFree(pVM,
305 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
306 pPage->idx,
307 iShw2);
308 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
309 }
310 }
311 }
312 }
313 }
314 break;
315 }
316
317 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
318 {
319 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
320 const unsigned iShw = off / sizeof(X86PTEPAE);
321 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
322 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
323 {
324 X86PTEPAE GstPte;
325 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
326 AssertRC(rc);
327
328 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
329 pgmPoolTracDerefGCPhysHint(pPool, pPage,
330 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
331 GstPte.u & X86_PTE_PAE_PG_MASK,
332 iShw);
333 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
334 }
335
336 /* paranoia / a bit assumptive. */
337 if ( (off & 7)
338 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
339 {
340 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
341 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
342
343 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
344 {
345 X86PTEPAE GstPte;
346# ifdef IN_RING3
347 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
348# else
349 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
350# endif
351 AssertRC(rc);
352 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
353 pgmPoolTracDerefGCPhysHint(pPool, pPage,
354 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
355 GstPte.u & X86_PTE_PAE_PG_MASK,
356 iShw2);
357 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
358 }
359 }
360 break;
361 }
362
363 case PGMPOOLKIND_32BIT_PD:
364 {
365 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
366 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
367
368 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
369 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
370# ifndef IN_RING0
371 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
372 {
373 Assert(pgmMapAreMappingsEnabled(pVM));
374 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
375 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
376 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
377 break;
378 }
379# endif /* !IN_RING0 */
380# ifndef IN_RING0
381 else
382# endif /* !IN_RING0 */
383 {
384 if (uShw.pPD->a[iShw].n.u1Present)
385 {
386 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
387 pgmPoolFree(pVM,
388 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
389 pPage->idx,
390 iShw);
391 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
392 }
393 }
394 /* paranoia / a bit assumptive. */
395 if ( (off & 3)
396 && (off & 3) + cbWrite > sizeof(X86PTE))
397 {
398 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
399 if ( iShw2 != iShw
400 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
401 {
402# ifndef IN_RING0
403 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
404 {
405 Assert(pgmMapAreMappingsEnabled(pVM));
406 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
407 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
408 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
409 break;
410 }
411# endif /* !IN_RING0 */
412# ifndef IN_RING0
413 else
414# endif /* !IN_RING0 */
415 {
416 if (uShw.pPD->a[iShw2].n.u1Present)
417 {
418 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
419 pgmPoolFree(pVM,
420 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
421 pPage->idx,
422 iShw2);
423 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
424 }
425 }
426 }
427 }
428#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
429 if ( uShw.pPD->a[iShw].n.u1Present
430 && !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
431 {
432 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
433# ifdef IN_RC /* TLB load - we're pushing things a bit... */
434 ASMProbeReadByte(pvAddress);
435# endif
436 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
437 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
438 }
439#endif
440 break;
441 }
442
443 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
444 {
445 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
446 const unsigned iShw = off / sizeof(X86PDEPAE);
447 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
448#ifndef IN_RING0
449 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
450 {
451 Assert(pgmMapAreMappingsEnabled(pVM));
452 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
453 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
454 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
455 break;
456 }
457#endif /* !IN_RING0 */
458 /*
459 * Causes trouble when the guest uses a PDE to refer to the whole page table level
460 * structure. (Invalidate here; faults later on when it tries to change the page
461 * table entries -> recheck; probably only applies to the RC case.)
462 */
463# ifndef IN_RING0
464 else
465# endif /* !IN_RING0 */
466 {
467 if (uShw.pPDPae->a[iShw].n.u1Present)
468 {
469 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
470 pgmPoolFree(pVM,
471 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
472 pPage->idx,
473 iShw);
474 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
475 }
476 }
477 /* paranoia / a bit assumptive. */
478 if ( (off & 7)
479 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
480 {
481 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
482 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
483
484#ifndef IN_RING0
485 if ( iShw2 != iShw
486 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
487 {
488 Assert(pgmMapAreMappingsEnabled(pVM));
489 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
490 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
491 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
492 break;
493 }
494#endif /* !IN_RING0 */
495# ifndef IN_RING0
496 else
497# endif /* !IN_RING0 */
498 if (uShw.pPDPae->a[iShw2].n.u1Present)
499 {
500 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
501 pgmPoolFree(pVM,
502 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
503 pPage->idx,
504 iShw2);
505 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
506 }
507 }
508 break;
509 }
510
511 case PGMPOOLKIND_PAE_PDPT:
512 {
513 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
514 /*
515 * Hopefully this doesn't happen very often:
516 * - touching unused parts of the page
517 * - messing with the bits of pd pointers without changing the physical address
518 */
519 /* PDPT roots are not page aligned; 32 byte only! */
520 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
521
522 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
523 const unsigned iShw = offPdpt / sizeof(X86PDPE);
524 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
525 {
526# ifndef IN_RING0
527 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
528 {
529 Assert(pgmMapAreMappingsEnabled(pVM));
530 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
531 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
532 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
533 break;
534 }
535# endif /* !IN_RING0 */
536# ifndef IN_RING0
537 else
538# endif /* !IN_RING0 */
539 if (uShw.pPDPT->a[iShw].n.u1Present)
540 {
541 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
542 pgmPoolFree(pVM,
543 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
544 pPage->idx,
545 iShw);
546 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
547 }
548
549 /* paranoia / a bit assumptive. */
550 if ( (offPdpt & 7)
551 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
552 {
553 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
554 if ( iShw2 != iShw
555 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
556 {
557# ifndef IN_RING0
558 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
559 {
560 Assert(pgmMapAreMappingsEnabled(pVM));
561 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
562 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
563 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
564 break;
565 }
566# endif /* !IN_RING0 */
567# ifndef IN_RING0
568 else
569# endif /* !IN_RING0 */
570 if (uShw.pPDPT->a[iShw2].n.u1Present)
571 {
572 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
573 pgmPoolFree(pVM,
574 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
575 pPage->idx,
576 iShw2);
577 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
578 }
579 }
580 }
581 }
582 break;
583 }
584
585#ifndef IN_RC
586 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
587 {
588 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
589 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
590 const unsigned iShw = off / sizeof(X86PDEPAE);
591 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
592 if (uShw.pPDPae->a[iShw].n.u1Present)
593 {
594 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
595 pgmPoolFree(pVM,
596 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
597 pPage->idx,
598 iShw);
599 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
600 }
601 /* paranoia / a bit assumptive. */
602 if ( (off & 7)
603 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
604 {
605 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
606 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
607
608 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
609 if (uShw.pPDPae->a[iShw2].n.u1Present)
610 {
611 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
612 pgmPoolFree(pVM,
613 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
614 pPage->idx,
615 iShw2);
616 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
617 }
618 }
619 break;
620 }
621
622 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
623 {
624 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
625 /*
626 * Hopefully this doesn't happen very often:
627 * - messing with the bits of pd pointers without changing the physical address
628 */
629 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
630 const unsigned iShw = off / sizeof(X86PDPE);
631 if (uShw.pPDPT->a[iShw].n.u1Present)
632 {
633 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
634 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
635 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
636 }
637 /* paranoia / a bit assumptive. */
638 if ( (off & 7)
639 && (off & 7) + cbWrite > sizeof(X86PDPE))
640 {
641 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
642 if (uShw.pPDPT->a[iShw2].n.u1Present)
643 {
644 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
645 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
646 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
647 }
648 }
649 break;
650 }
651
652 case PGMPOOLKIND_64BIT_PML4:
653 {
654 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
655 /*
656 * Hopefully this doesn't happen very often:
657 * - messing with the bits of pd pointers without changing the physical address
658 */
659 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
660 const unsigned iShw = off / sizeof(X86PDPE);
661 if (uShw.pPML4->a[iShw].n.u1Present)
662 {
663 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
664 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
665 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
666 }
667 /* paranoia / a bit assumptive. */
668 if ( (off & 7)
669 && (off & 7) + cbWrite > sizeof(X86PDPE))
670 {
671 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
672 if (uShw.pPML4->a[iShw2].n.u1Present)
673 {
674 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
675 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
676 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
677 }
678 }
679 break;
680 }
681#endif /* IN_RING0 */
682
683 default:
684 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
685 }
686 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
687
688 /* next */
689 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
690 return;
691 pPage = &pPool->aPages[pPage->iMonitoredNext];
692 }
693}
694
695# ifndef IN_RING3
696/**
697 * Checks if a access could be a fork operation in progress.
698 *
699 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
700 *
701 * @returns true if it's likely that we're forking, otherwise false.
702 * @param pPool The pool.
703 * @param pDis The disassembled instruction.
704 * @param offFault The access offset.
705 */
706DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
707{
708 /*
709 * i386 linux is using btr to clear X86_PTE_RW.
710 * The functions involved are (2.6.16 source inspection):
711 * clear_bit
712 * ptep_set_wrprotect
713 * copy_one_pte
714 * copy_pte_range
715 * copy_pmd_range
716 * copy_pud_range
717 * copy_page_range
718 * dup_mmap
719 * dup_mm
720 * copy_mm
721 * copy_process
722 * do_fork
723 */
724 if ( pDis->pCurInstr->opcode == OP_BTR
725 && !(offFault & 4)
726 /** @todo Validate that the bit index is X86_PTE_RW. */
727 )
728 {
729 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
730 return true;
731 }
732 return false;
733}
734
735
736/**
737 * Determine whether the page is likely to have been reused.
738 *
739 * @returns true if we consider the page as being reused for a different purpose.
740 * @returns false if we consider it to still be a paging page.
741 * @param pVM VM Handle.
742 * @param pVCpu VMCPU Handle.
743 * @param pRegFrame Trap register frame.
744 * @param pDis The disassembly info for the faulting instruction.
745 * @param pvFault The fault address.
746 *
747 * @remark The REP prefix check is left to the caller because of STOSD/W.
748 */
749DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
750{
751#ifndef IN_RC
752 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
753 if ( HWACCMHasPendingIrq(pVM)
754 && (pRegFrame->rsp - pvFault) < 32)
755 {
756 /* Fault caused by stack writes while trying to inject an interrupt event. */
757 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
758 return true;
759 }
760#else
761 NOREF(pVM); NOREF(pvFault);
762#endif
763
764 LogFlow(("Reused instr %RGv %d at %RGv param1.flags=%x param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->opcode, pvFault, pDis->param1.flags, pDis->param1.base.reg_gen));
765
766 /* Non-supervisor mode write means it's used for something else. */
767 if (CPUMGetGuestCPL(pVCpu, pRegFrame) != 0)
768 return true;
769
770 switch (pDis->pCurInstr->opcode)
771 {
772 /* call implies the actual push of the return address faulted */
773 case OP_CALL:
774 Log4(("pgmPoolMonitorIsReused: CALL\n"));
775 return true;
776 case OP_PUSH:
777 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
778 return true;
779 case OP_PUSHF:
780 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
781 return true;
782 case OP_PUSHA:
783 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
784 return true;
785 case OP_FXSAVE:
786 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
787 return true;
788 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
789 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
790 return true;
791 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
792 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
793 return true;
794 case OP_MOVSWD:
795 case OP_STOSWD:
796 if ( pDis->prefix == (PREFIX_REP|PREFIX_REX)
797 && pRegFrame->rcx >= 0x40
798 )
799 {
800 Assert(pDis->mode == CPUMODE_64BIT);
801
802 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
803 return true;
804 }
805 return false;
806 }
807 if ( ( (pDis->param1.flags & USE_REG_GEN32)
808 || (pDis->param1.flags & USE_REG_GEN64))
809 && (pDis->param1.base.reg_gen == USE_REG_ESP))
810 {
811 Log4(("pgmPoolMonitorIsReused: ESP\n"));
812 return true;
813 }
814
815 return false;
816}
817
818/**
819 * Flushes the page being accessed.
820 *
821 * @returns VBox status code suitable for scheduling.
822 * @param pVM The VM handle.
823 * @param pVCpu The VMCPU handle.
824 * @param pPool The pool.
825 * @param pPage The pool page (head).
826 * @param pDis The disassembly of the write instruction.
827 * @param pRegFrame The trap register frame.
828 * @param GCPhysFault The fault address as guest physical address.
829 * @param pvFault The fault address.
830 * @todo VBOXSTRICTRC
831 */
832static int pgmPoolAccessHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
833 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
834{
835 /*
836 * First, do the flushing.
837 */
838 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
839
840 /*
841 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
842 * Must do this in raw mode (!); XP boot will fail otherwise.
843 */
844 uint32_t cbWritten;
845 VBOXSTRICTRC rc2 = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL, &cbWritten);
846 if (RT_SUCCESS(rc2))
847 {
848 pRegFrame->rip += pDis->opsize;
849 AssertMsg(rc2 == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
850 }
851 else if (rc2 == VERR_EM_INTERPRETER)
852 {
853#ifdef IN_RC
854 if (PATMIsPatchGCAddr(pVM, pRegFrame->eip))
855 {
856 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
857 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
858 rc = VINF_SUCCESS;
859 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
860 }
861 else
862#endif
863 {
864 rc = VINF_EM_RAW_EMULATE_INSTR;
865 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
866 }
867 }
868 else
869 rc = VBOXSTRICTRC_VAL(rc2);
870
871 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
872 return rc;
873}
874
875/**
876 * Handles the STOSD write accesses.
877 *
878 * @returns VBox status code suitable for scheduling.
879 * @param pVM The VM handle.
880 * @param pPool The pool.
881 * @param pPage The pool page (head).
882 * @param pDis The disassembly of the write instruction.
883 * @param pRegFrame The trap register frame.
884 * @param GCPhysFault The fault address as guest physical address.
885 * @param pvFault The fault address.
886 */
887DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
888 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
889{
890 unsigned uIncrement = pDis->param1.size;
891
892 Assert(pDis->mode == CPUMODE_32BIT || pDis->mode == CPUMODE_64BIT);
893 Assert(pRegFrame->rcx <= 0x20);
894
895#ifdef VBOX_STRICT
896 if (pDis->opmode == CPUMODE_32BIT)
897 Assert(uIncrement == 4);
898 else
899 Assert(uIncrement == 8);
900#endif
901
902 Log3(("pgmPoolAccessHandlerSTOSD\n"));
903
904 /*
905 * Increment the modification counter and insert it into the list
906 * of modified pages the first time.
907 */
908 if (!pPage->cModifications++)
909 pgmPoolMonitorModifiedInsert(pPool, pPage);
910
911 /*
912 * Execute REP STOSD.
913 *
914 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
915 * write situation, meaning that it's safe to write here.
916 */
917 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
918 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
919 while (pRegFrame->rcx)
920 {
921#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
922 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
923 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
924 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
925#else
926 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
927#endif
928#ifdef IN_RC
929 *(uint32_t *)(uintptr_t)pu32 = pRegFrame->eax;
930#else
931 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
932#endif
933 pu32 += uIncrement;
934 GCPhysFault += uIncrement;
935 pRegFrame->rdi += uIncrement;
936 pRegFrame->rcx--;
937 }
938 pRegFrame->rip += pDis->opsize;
939
940 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
941 return VINF_SUCCESS;
942}
943
944
945/**
946 * Handles the simple write accesses.
947 *
948 * @returns VBox status code suitable for scheduling.
949 * @param pVM The VM handle.
950 * @param pVCpu The VMCPU handle.
951 * @param pPool The pool.
952 * @param pPage The pool page (head).
953 * @param pDis The disassembly of the write instruction.
954 * @param pRegFrame The trap register frame.
955 * @param GCPhysFault The fault address as guest physical address.
956 * @param pvFault The fault address.
957 * @param pfReused Reused state (out)
958 */
959DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
960 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
961{
962 Log3(("pgmPoolAccessHandlerSimple\n"));
963 /*
964 * Increment the modification counter and insert it into the list
965 * of modified pages the first time.
966 */
967 if (!pPage->cModifications++)
968 pgmPoolMonitorModifiedInsert(pPool, pPage);
969
970 /*
971 * Clear all the pages. ASSUMES that pvFault is readable.
972 */
973#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
974 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
975 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
976 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
977#else
978 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
979#endif
980
981 /*
982 * Interpret the instruction.
983 */
984 uint32_t cb;
985 VBOXSTRICTRC rc = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL, &cb);
986 if (RT_SUCCESS(rc))
987 {
988 pRegFrame->rip += pDis->opsize;
989 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
990 }
991 else if (rc == VERR_EM_INTERPRETER)
992 {
993 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
994 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode));
995 rc = VINF_EM_RAW_EMULATE_INSTR;
996 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
997 }
998
999#if 0 /* experimental code */
1000 if (rc == VINF_SUCCESS)
1001 {
1002 switch (pPage->enmKind)
1003 {
1004 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1005 {
1006 X86PTEPAE GstPte;
1007 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1008 AssertRC(rc);
1009
1010 /* Check the new value written by the guest. If present and with a bogus physical address, then
1011 * it's fairly safe to assume the guest is reusing the PT.
1012 */
1013 if (GstPte.n.u1Present)
1014 {
1015 RTHCPHYS HCPhys = -1;
1016 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1017 if (rc != VINF_SUCCESS)
1018 {
1019 *pfReused = true;
1020 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1021 }
1022 }
1023 break;
1024 }
1025 }
1026 }
1027#endif
1028
1029 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", VBOXSTRICTRC_VAL(rc), cb));
1030 return VBOXSTRICTRC_VAL(rc);
1031}
1032
1033/**
1034 * \#PF Handler callback for PT write accesses.
1035 *
1036 * @returns VBox status code (appropriate for GC return).
1037 * @param pVM VM Handle.
1038 * @param uErrorCode CPU Error code.
1039 * @param pRegFrame Trap register frame.
1040 * NULL on DMA and other non CPU access.
1041 * @param pvFault The fault address (cr2).
1042 * @param GCPhysFault The GC physical address corresponding to pvFault.
1043 * @param pvUser User argument.
1044 */
1045DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1046{
1047 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1048 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1049 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1050 PVMCPU pVCpu = VMMGetCpu(pVM);
1051 unsigned cMaxModifications;
1052 bool fForcedFlush = false;
1053
1054 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1055
1056 pgmLock(pVM);
1057 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1058 {
1059 /* Pool page changed while we were waiting for the lock; ignore. */
1060 Log(("CPU%d: pgmPoolAccessHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1061 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1062 pgmUnlock(pVM);
1063 return VINF_SUCCESS;
1064 }
1065#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1066 if (pPage->fDirty)
1067 {
1068 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH));
1069 pgmUnlock(pVM);
1070 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1071 }
1072#endif
1073
1074#if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1075 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1076 {
1077 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1078 void *pvGst;
1079 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1080 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1081 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1082 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1083 }
1084#endif
1085
1086 /*
1087 * Disassemble the faulting instruction.
1088 */
1089 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1090 int rc = EMInterpretDisasOne(pVM, pVCpu, pRegFrame, pDis, NULL);
1091 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1092 {
1093 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1094 pgmUnlock(pVM);
1095 return rc;
1096 }
1097
1098 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1099
1100 /*
1101 * We should ALWAYS have the list head as user parameter. This
1102 * is because we use that page to record the changes.
1103 */
1104 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1105
1106#ifdef IN_RING0
1107 /* Maximum nr of modifications depends on the page type. */
1108 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1109 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1110 cMaxModifications = 4;
1111 else
1112 cMaxModifications = 24;
1113#else
1114 cMaxModifications = 48;
1115#endif
1116
1117 /*
1118 * Incremental page table updates should weigh more than random ones.
1119 * (Only applies when started from offset 0)
1120 */
1121 pVCpu->pgm.s.cPoolAccessHandler++;
1122 if ( pPage->pvLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1123 && pPage->pvLastAccessHandlerRip < pRegFrame->rip + 0x40
1124 && pvFault == (pPage->pvLastAccessHandlerFault + pDis->param1.size)
1125 && pVCpu->pgm.s.cPoolAccessHandler == (pPage->cLastAccessHandlerCount + 1))
1126 {
1127 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1128 Assert(pPage->cModifications < 32000);
1129 pPage->cModifications = pPage->cModifications * 2;
1130 pPage->pvLastAccessHandlerFault = pvFault;
1131 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1132 if (pPage->cModifications >= cMaxModifications)
1133 {
1134 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1135 fForcedFlush = true;
1136 }
1137 }
1138
1139 if (pPage->cModifications >= cMaxModifications)
1140 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1141
1142 /*
1143 * Check if it's worth dealing with.
1144 */
1145 bool fReused = false;
1146 bool fNotReusedNotForking = false;
1147 if ( ( pPage->cModifications < cMaxModifications /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1148 || pgmPoolIsPageLocked(pPage)
1149 )
1150 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1151 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1152 {
1153 /*
1154 * Simple instructions, no REP prefix.
1155 */
1156 if (!(pDis->prefix & (PREFIX_REP | PREFIX_REPNE)))
1157 {
1158 rc = pgmPoolAccessHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1159 if (fReused)
1160 goto flushPage;
1161
1162 /* A mov instruction to change the first page table entry will be remembered so we can detect
1163 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1164 */
1165 if ( rc == VINF_SUCCESS
1166 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1167 && pDis->pCurInstr->opcode == OP_MOV
1168 && (pvFault & PAGE_OFFSET_MASK) == 0)
1169 {
1170 pPage->pvLastAccessHandlerFault = pvFault;
1171 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1172 pPage->pvLastAccessHandlerRip = pRegFrame->rip;
1173 /* Make sure we don't kick out a page too quickly. */
1174 if (pPage->cModifications > 8)
1175 pPage->cModifications = 2;
1176 }
1177 else
1178 if (pPage->pvLastAccessHandlerFault == pvFault)
1179 {
1180 /* ignore the 2nd write to this page table entry. */
1181 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1182 }
1183 else
1184 {
1185 pPage->pvLastAccessHandlerFault = 0;
1186 pPage->pvLastAccessHandlerRip = 0;
1187 }
1188
1189 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1190 pgmUnlock(pVM);
1191 return rc;
1192 }
1193
1194 /*
1195 * Windows is frequently doing small memset() operations (netio test 4k+).
1196 * We have to deal with these or we'll kill the cache and performance.
1197 */
1198 if ( pDis->pCurInstr->opcode == OP_STOSWD
1199 && !pRegFrame->eflags.Bits.u1DF
1200 && pDis->opmode == pDis->mode
1201 && pDis->addrmode == pDis->mode)
1202 {
1203 bool fValidStosd = false;
1204
1205 if ( pDis->mode == CPUMODE_32BIT
1206 && pDis->prefix == PREFIX_REP
1207 && pRegFrame->ecx <= 0x20
1208 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1209 && !((uintptr_t)pvFault & 3)
1210 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1211 )
1212 {
1213 fValidStosd = true;
1214 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1215 }
1216 else
1217 if ( pDis->mode == CPUMODE_64BIT
1218 && pDis->prefix == (PREFIX_REP | PREFIX_REX)
1219 && pRegFrame->rcx <= 0x20
1220 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1221 && !((uintptr_t)pvFault & 7)
1222 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1223 )
1224 {
1225 fValidStosd = true;
1226 }
1227
1228 if (fValidStosd)
1229 {
1230 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1231 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1232 pgmUnlock(pVM);
1233 return rc;
1234 }
1235 }
1236
1237 /* REP prefix, don't bother. */
1238 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1239 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1240 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode, pDis->prefix));
1241 fNotReusedNotForking = true;
1242 }
1243
1244#if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1245 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1246 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1247 */
1248 if ( pPage->cModifications >= cMaxModifications
1249 && !fForcedFlush
1250 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1251 && ( fNotReusedNotForking
1252 || ( !pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
1253 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1254 )
1255 )
1256 {
1257 Assert(!pgmPoolIsPageLocked(pPage));
1258 Assert(pPage->fDirty == false);
1259
1260 /* Flush any monitored duplicates as we will disable write protection. */
1261 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1262 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1263 {
1264 PPGMPOOLPAGE pPageHead = pPage;
1265
1266 /* Find the monitor head. */
1267 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1268 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1269
1270 while (pPageHead)
1271 {
1272 unsigned idxNext = pPageHead->iMonitoredNext;
1273
1274 if (pPageHead != pPage)
1275 {
1276 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1277 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1278 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1279 AssertRC(rc2);
1280 }
1281
1282 if (idxNext == NIL_PGMPOOL_IDX)
1283 break;
1284
1285 pPageHead = &pPool->aPages[idxNext];
1286 }
1287 }
1288
1289 /* The flushing above might fail for locked pages, so double check. */
1290 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1291 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1292 {
1293 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1294
1295 /* Temporarily allow write access to the page table again. */
1296 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK, pPage->GCPhys & PAGE_BASE_GC_MASK);
1297 if (rc == VINF_SUCCESS)
1298 {
1299 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1300 AssertMsg(rc == VINF_SUCCESS
1301 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1302 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1303 || rc == VERR_PAGE_NOT_PRESENT,
1304 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1305
1306 pPage->pvDirtyFault = pvFault;
1307
1308 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1309 pgmUnlock(pVM);
1310 return rc;
1311 }
1312 }
1313 }
1314#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1315
1316 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushModOverflow));
1317flushPage:
1318 /*
1319 * Not worth it, so flush it.
1320 *
1321 * If we considered it to be reused, don't go back to ring-3
1322 * to emulate failed instructions since we usually cannot
1323 * interpret then. This may be a bit risky, in which case
1324 * the reuse detection must be fixed.
1325 */
1326 rc = pgmPoolAccessHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1327 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1328 && fReused)
1329 {
1330 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1331 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1332 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1333 }
1334 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1335 pgmUnlock(pVM);
1336 return rc;
1337}
1338
1339# endif /* !IN_RING3 */
1340
1341# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1342
1343# ifdef VBOX_STRICT
1344/**
1345 * Check references to guest physical memory in a PAE / PAE page table.
1346 *
1347 * @param pPool The pool.
1348 * @param pPage The page.
1349 * @param pShwPT The shadow page table (mapping of the page).
1350 * @param pGstPT The guest page table.
1351 */
1352static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1353{
1354 unsigned cErrors = 0;
1355 int LastRc = -1; /* initialized to shut up gcc */
1356 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1357 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1358 PVM pVM = pPool->CTX_SUFF(pVM);
1359
1360#ifdef VBOX_STRICT
1361 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1362 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1363#endif
1364 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1365 {
1366 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1367 {
1368 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1369 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1370 if ( rc != VINF_SUCCESS
1371 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1372 {
1373 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1374 LastPTE = i;
1375 LastRc = rc;
1376 LastHCPhys = HCPhys;
1377 cErrors++;
1378
1379 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1380 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1381 AssertRC(rc);
1382
1383 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1384 {
1385 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1386
1387 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1388 {
1389 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1390
1391 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1392 {
1393 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1394 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1395 {
1396 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1397 }
1398 }
1399
1400 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1401 }
1402 }
1403 }
1404 }
1405 }
1406 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1407}
1408
1409/**
1410 * Check references to guest physical memory in a PAE / 32-bit page table.
1411 *
1412 * @param pPool The pool.
1413 * @param pPage The page.
1414 * @param pShwPT The shadow page table (mapping of the page).
1415 * @param pGstPT The guest page table.
1416 */
1417static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1418{
1419 unsigned cErrors = 0;
1420 int LastRc = -1; /* initialized to shut up gcc */
1421 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1422 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1423 PVM pVM = pPool->CTX_SUFF(pVM);
1424
1425#ifdef VBOX_STRICT
1426 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1427 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1428#endif
1429 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1430 {
1431 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1432 {
1433 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1434 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1435 if ( rc != VINF_SUCCESS
1436 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1437 {
1438 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1439 LastPTE = i;
1440 LastRc = rc;
1441 LastHCPhys = HCPhys;
1442 cErrors++;
1443
1444 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1445 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1446 AssertRC(rc);
1447
1448 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1449 {
1450 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1451
1452 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1453 {
1454 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1455
1456 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1457 {
1458 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1459 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1460 {
1461 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1462 }
1463 }
1464
1465 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1466 }
1467 }
1468 }
1469 }
1470 }
1471 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1472}
1473
1474# endif /* VBOX_STRICT */
1475
1476/**
1477 * Clear references to guest physical memory in a PAE / PAE page table.
1478 *
1479 * @returns nr of changed PTEs
1480 * @param pPool The pool.
1481 * @param pPage The page.
1482 * @param pShwPT The shadow page table (mapping of the page).
1483 * @param pGstPT The guest page table.
1484 * @param pOldGstPT The old cached guest page table.
1485 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1486 * @param pfFlush Flush reused page table (out)
1487 */
1488DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1489 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1490{
1491 unsigned cChanged = 0;
1492
1493#ifdef VBOX_STRICT
1494 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1495 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1496#endif
1497 *pfFlush = false;
1498
1499 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1500 {
1501 /* Check the new value written by the guest. If present and with a bogus physical address, then
1502 * it's fairly safe to assume the guest is reusing the PT.
1503 */
1504 if ( fAllowRemoval
1505 && pGstPT->a[i].n.u1Present)
1506 {
1507 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1508 {
1509 *pfFlush = true;
1510 return ++cChanged;
1511 }
1512 }
1513 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1514 {
1515 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1516 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1517 {
1518#ifdef VBOX_STRICT
1519 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1520 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1521 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1522#endif
1523 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1524 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1525 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1526 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1527
1528 if ( uHostAttr == uGuestAttr
1529 && fHostRW <= fGuestRW)
1530 continue;
1531 }
1532 cChanged++;
1533 /* Something was changed, so flush it. */
1534 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1535 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1536 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1537 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1538 }
1539 }
1540 return cChanged;
1541}
1542
1543/**
1544 * Clear references to guest physical memory in a PAE / PAE page table.
1545 *
1546 * @returns nr of changed PTEs
1547 * @param pPool The pool.
1548 * @param pPage The page.
1549 * @param pShwPT The shadow page table (mapping of the page).
1550 * @param pGstPT The guest page table.
1551 * @param pOldGstPT The old cached guest page table.
1552 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1553 * @param pfFlush Flush reused page table (out)
1554 */
1555DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1556 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1557{
1558 unsigned cChanged = 0;
1559
1560#ifdef VBOX_STRICT
1561 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1562 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1563#endif
1564 *pfFlush = false;
1565
1566 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1567 {
1568 /* Check the new value written by the guest. If present and with a bogus physical address, then
1569 * it's fairly safe to assume the guest is reusing the PT.
1570 */
1571 if ( fAllowRemoval
1572 && pGstPT->a[i].n.u1Present)
1573 {
1574 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK))
1575 {
1576 *pfFlush = true;
1577 return ++cChanged;
1578 }
1579 }
1580 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1581 {
1582 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1583 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1584 {
1585#ifdef VBOX_STRICT
1586 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1587 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1588 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1589#endif
1590 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1591 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1592 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1593 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1594
1595 if ( uHostAttr == uGuestAttr
1596 && fHostRW <= fGuestRW)
1597 continue;
1598 }
1599 cChanged++;
1600 /* Something was changed, so flush it. */
1601 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1602 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1603 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1604 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1605 }
1606 }
1607 return cChanged;
1608}
1609
1610/**
1611 * Flush a dirty page
1612 *
1613 * @param pVM VM Handle.
1614 * @param pPool The pool.
1615 * @param idxSlot Dirty array slot index
1616 * @param fAllowRemoval Allow a reused page table to be removed
1617 */
1618static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1619{
1620 PPGMPOOLPAGE pPage;
1621 unsigned idxPage;
1622
1623 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1624 if (pPool->aDirtyPages[idxSlot].uIdx == NIL_PGMPOOL_IDX)
1625 return;
1626
1627 idxPage = pPool->aDirtyPages[idxSlot].uIdx;
1628 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1629 pPage = &pPool->aPages[idxPage];
1630 Assert(pPage->idx == idxPage);
1631 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1632
1633 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1634 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1635
1636#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1637 PVMCPU pVCpu = VMMGetCpu(pVM);
1638 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
1639#endif
1640
1641 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1642 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK);
1643 Assert(rc == VINF_SUCCESS);
1644 pPage->fDirty = false;
1645
1646#ifdef VBOX_STRICT
1647 uint64_t fFlags = 0;
1648 RTHCPHYS HCPhys;
1649 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->pvDirtyFault, &fFlags, &HCPhys);
1650 AssertMsg( ( rc == VINF_SUCCESS
1651 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1652 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1653 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1654 || rc == VERR_PAGE_NOT_PRESENT,
1655 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->pvDirtyFault, rc, fFlags));
1656#endif
1657
1658 /* Flush those PTEs that have changed. */
1659 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1660 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1661 void *pvGst;
1662 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1663 bool fFlush;
1664 unsigned cChanges;
1665
1666 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1667 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1668 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1669 else
1670 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1671 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1672
1673 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1674 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1675 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1676 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1677
1678 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1679 Assert(pPage->cModifications);
1680 if (cChanges < 4)
1681 pPage->cModifications = 1; /* must use > 0 here */
1682 else
1683 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1684
1685 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1686 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1687 pPool->idxFreeDirtyPage = idxSlot;
1688
1689 pPool->cDirtyPages--;
1690 pPool->aDirtyPages[idxSlot].uIdx = NIL_PGMPOOL_IDX;
1691 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1692 if (fFlush)
1693 {
1694 Assert(fAllowRemoval);
1695 Log(("Flush reused page table!\n"));
1696 pgmPoolFlushPage(pPool, pPage);
1697 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1698 }
1699 else
1700 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1701
1702#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1703 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
1704#endif
1705}
1706
1707# ifndef IN_RING3
1708/**
1709 * Add a new dirty page
1710 *
1711 * @param pVM VM Handle.
1712 * @param pPool The pool.
1713 * @param pPage The page.
1714 */
1715void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1716{
1717 unsigned idxFree;
1718
1719 PGM_LOCK_ASSERT_OWNER(pVM);
1720 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1721 Assert(!pPage->fDirty);
1722
1723 idxFree = pPool->idxFreeDirtyPage;
1724 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1725 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1726
1727 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1728 {
1729 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1730 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1731 }
1732 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1733 AssertMsg(pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1734
1735 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1736
1737 /*
1738 * Make a copy of the guest page table as we require valid GCPhys addresses
1739 * when removing references to physical pages.
1740 * (The HCPhys linear lookup is *extremely* expensive!)
1741 */
1742 void *pvGst;
1743 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1744 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst, (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT) ? PAGE_SIZE : PAGE_SIZE/2);
1745#ifdef VBOX_STRICT
1746 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1747 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1748 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1749 else
1750 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1751 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1752#endif
1753 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1754
1755 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1756 pPage->fDirty = true;
1757 pPage->idxDirty = idxFree;
1758 pPool->aDirtyPages[idxFree].uIdx = pPage->idx;
1759 pPool->cDirtyPages++;
1760
1761 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1762 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1763 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1764 {
1765 unsigned i;
1766 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1767 {
1768 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1769 if (pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX)
1770 {
1771 pPool->idxFreeDirtyPage = idxFree;
1772 break;
1773 }
1774 }
1775 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1776 }
1777
1778 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX);
1779 return;
1780}
1781# endif /* !IN_RING3 */
1782
1783/**
1784 * Check if the specified page is dirty (not write monitored)
1785 *
1786 * @return dirty or not
1787 * @param pVM VM Handle.
1788 * @param GCPhys Guest physical address
1789 */
1790bool pgmPoolIsDirtyPage(PVM pVM, RTGCPHYS GCPhys)
1791{
1792 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1793 PGM_LOCK_ASSERT_OWNER(pVM);
1794 if (!pPool->cDirtyPages)
1795 return false;
1796
1797 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1798
1799 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1800 {
1801 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
1802 {
1803 PPGMPOOLPAGE pPage;
1804 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
1805
1806 pPage = &pPool->aPages[idxPage];
1807 if (pPage->GCPhys == GCPhys)
1808 return true;
1809 }
1810 }
1811 return false;
1812}
1813
1814/**
1815 * Reset all dirty pages by reinstating page monitoring.
1816 *
1817 * @param pVM VM Handle.
1818 */
1819void pgmPoolResetDirtyPages(PVM pVM)
1820{
1821 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1822 PGM_LOCK_ASSERT_OWNER(pVM);
1823 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1824
1825 if (!pPool->cDirtyPages)
1826 return;
1827
1828 Log(("pgmPoolResetDirtyPages\n"));
1829 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1830 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1831
1832 pPool->idxFreeDirtyPage = 0;
1833 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1834 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1835 {
1836 unsigned i;
1837 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1838 {
1839 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
1840 {
1841 pPool->idxFreeDirtyPage = i;
1842 break;
1843 }
1844 }
1845 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1846 }
1847
1848 Assert(pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
1849 return;
1850}
1851
1852/**
1853 * Invalidate the PT entry for the specified page
1854 *
1855 * @param pVM VM Handle.
1856 * @param GCPtrPage Guest page to invalidate
1857 */
1858void pgmPoolResetDirtyPage(PVM pVM, RTGCPTR GCPtrPage)
1859{
1860 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1861 PGM_LOCK_ASSERT_OWNER(pVM);
1862 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1863
1864 if (!pPool->cDirtyPages)
1865 return;
1866
1867 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage));
1868 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1869 {
1870 }
1871}
1872
1873/**
1874 * Reset all dirty pages by reinstating page monitoring.
1875 *
1876 * @param pVM VM Handle.
1877 * @param GCPhysPT Physical address of the page table
1878 */
1879void pgmPoolInvalidateDirtyPage(PVM pVM, RTGCPHYS GCPhysPT)
1880{
1881 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1882 PGM_LOCK_ASSERT_OWNER(pVM);
1883 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1884 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
1885
1886 if (!pPool->cDirtyPages)
1887 return;
1888
1889 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1890
1891 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1892 {
1893 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
1894 {
1895 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
1896
1897 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1898 if (pPage->GCPhys == GCPhysPT)
1899 {
1900 idxDirtyPage = i;
1901 break;
1902 }
1903 }
1904 }
1905
1906 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
1907 {
1908 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
1909 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1910 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1911 {
1912 unsigned i;
1913 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1914 {
1915 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
1916 {
1917 pPool->idxFreeDirtyPage = i;
1918 break;
1919 }
1920 }
1921 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1922 }
1923 }
1924}
1925
1926# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1927
1928/**
1929 * Inserts a page into the GCPhys hash table.
1930 *
1931 * @param pPool The pool.
1932 * @param pPage The page.
1933 */
1934DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1935{
1936 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1937 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1938 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1939 pPage->iNext = pPool->aiHash[iHash];
1940 pPool->aiHash[iHash] = pPage->idx;
1941}
1942
1943
1944/**
1945 * Removes a page from the GCPhys hash table.
1946 *
1947 * @param pPool The pool.
1948 * @param pPage The page.
1949 */
1950DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1951{
1952 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1953 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1954 if (pPool->aiHash[iHash] == pPage->idx)
1955 pPool->aiHash[iHash] = pPage->iNext;
1956 else
1957 {
1958 uint16_t iPrev = pPool->aiHash[iHash];
1959 for (;;)
1960 {
1961 const int16_t i = pPool->aPages[iPrev].iNext;
1962 if (i == pPage->idx)
1963 {
1964 pPool->aPages[iPrev].iNext = pPage->iNext;
1965 break;
1966 }
1967 if (i == NIL_PGMPOOL_IDX)
1968 {
1969 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
1970 break;
1971 }
1972 iPrev = i;
1973 }
1974 }
1975 pPage->iNext = NIL_PGMPOOL_IDX;
1976}
1977
1978
1979/**
1980 * Frees up one cache page.
1981 *
1982 * @returns VBox status code.
1983 * @retval VINF_SUCCESS on success.
1984 * @param pPool The pool.
1985 * @param iUser The user index.
1986 */
1987static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1988{
1989#ifndef IN_RC
1990 const PVM pVM = pPool->CTX_SUFF(pVM);
1991#endif
1992 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1993 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1994
1995 /*
1996 * Select one page from the tail of the age list.
1997 */
1998 PPGMPOOLPAGE pPage;
1999 for (unsigned iLoop = 0; ; iLoop++)
2000 {
2001 uint16_t iToFree = pPool->iAgeTail;
2002 if (iToFree == iUser)
2003 iToFree = pPool->aPages[iToFree].iAgePrev;
2004/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2005 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2006 {
2007 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2008 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2009 {
2010 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2011 continue;
2012 iToFree = i;
2013 break;
2014 }
2015 }
2016*/
2017 Assert(iToFree != iUser);
2018 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
2019 pPage = &pPool->aPages[iToFree];
2020
2021 /*
2022 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2023 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2024 */
2025 if (!pgmPoolIsPageLocked(pPage))
2026 break;
2027 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2028 pgmPoolCacheUsed(pPool, pPage);
2029 AssertLogRelReturn(iLoop < 8192, VERR_INTERNAL_ERROR);
2030 }
2031
2032 /*
2033 * Found a usable page, flush it and return.
2034 */
2035 int rc = pgmPoolFlushPage(pPool, pPage);
2036 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2037 /* todo: find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2038 if (rc == VINF_SUCCESS)
2039 PGM_INVL_ALL_VCPU_TLBS(pVM);
2040 return rc;
2041}
2042
2043
2044/**
2045 * Checks if a kind mismatch is really a page being reused
2046 * or if it's just normal remappings.
2047 *
2048 * @returns true if reused and the cached page (enmKind1) should be flushed
2049 * @returns false if not reused.
2050 * @param enmKind1 The kind of the cached page.
2051 * @param enmKind2 The kind of the requested page.
2052 */
2053static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2054{
2055 switch (enmKind1)
2056 {
2057 /*
2058 * Never reuse them. There is no remapping in non-paging mode.
2059 */
2060 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2061 case PGMPOOLKIND_32BIT_PD_PHYS:
2062 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2063 case PGMPOOLKIND_PAE_PD_PHYS:
2064 case PGMPOOLKIND_PAE_PDPT_PHYS:
2065 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2066 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2067 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2068 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2069 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2070 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2071 return false;
2072
2073 /*
2074 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2075 */
2076 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2077 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2078 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2079 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2080 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2081 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2082 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2083 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2084 case PGMPOOLKIND_32BIT_PD:
2085 case PGMPOOLKIND_PAE_PDPT:
2086 switch (enmKind2)
2087 {
2088 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2089 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2090 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2091 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2092 case PGMPOOLKIND_64BIT_PML4:
2093 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2094 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2095 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2096 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2097 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2098 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2099 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2100 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2101 return true;
2102 default:
2103 return false;
2104 }
2105
2106 /*
2107 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2108 */
2109 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2110 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2111 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2112 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2113 case PGMPOOLKIND_64BIT_PML4:
2114 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2115 switch (enmKind2)
2116 {
2117 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2118 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2119 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2120 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2121 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2122 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2123 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2124 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2125 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2126 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2127 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2128 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2129 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2130 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2131 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2132 return true;
2133 default:
2134 return false;
2135 }
2136
2137 /*
2138 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2139 */
2140 case PGMPOOLKIND_ROOT_NESTED:
2141 return false;
2142
2143 default:
2144 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2145 }
2146}
2147
2148
2149/**
2150 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2151 *
2152 * @returns VBox status code.
2153 * @retval VINF_PGM_CACHED_PAGE on success.
2154 * @retval VERR_FILE_NOT_FOUND if not found.
2155 * @param pPool The pool.
2156 * @param GCPhys The GC physical address of the page we're gonna shadow.
2157 * @param enmKind The kind of mapping.
2158 * @param enmAccess Access type for the mapping (only relevant for big pages)
2159 * @param iUser The shadow page pool index of the user table.
2160 * @param iUserTable The index into the user table (shadowed).
2161 * @param ppPage Where to store the pointer to the page.
2162 */
2163static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2164{
2165#ifndef IN_RC
2166 const PVM pVM = pPool->CTX_SUFF(pVM);
2167#endif
2168 /*
2169 * Look up the GCPhys in the hash.
2170 */
2171 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2172 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2173 if (i != NIL_PGMPOOL_IDX)
2174 {
2175 do
2176 {
2177 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2178 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2179 if (pPage->GCPhys == GCPhys)
2180 {
2181 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2182 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess)
2183 {
2184 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2185 * doesn't flush it in case there are no more free use records.
2186 */
2187 pgmPoolCacheUsed(pPool, pPage);
2188
2189 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2190 if (RT_SUCCESS(rc))
2191 {
2192 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2193 *ppPage = pPage;
2194 if (pPage->cModifications)
2195 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2196 STAM_COUNTER_INC(&pPool->StatCacheHits);
2197 return VINF_PGM_CACHED_PAGE;
2198 }
2199 return rc;
2200 }
2201
2202 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2203 {
2204 /*
2205 * The kind is different. In some cases we should now flush the page
2206 * as it has been reused, but in most cases this is normal remapping
2207 * of PDs as PT or big pages using the GCPhys field in a slightly
2208 * different way than the other kinds.
2209 */
2210 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2211 {
2212 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2213 pgmPoolFlushPage(pPool, pPage);
2214 break;
2215 }
2216 }
2217 }
2218
2219 /* next */
2220 i = pPage->iNext;
2221 } while (i != NIL_PGMPOOL_IDX);
2222 }
2223
2224 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2225 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2226 return VERR_FILE_NOT_FOUND;
2227}
2228
2229
2230/**
2231 * Inserts a page into the cache.
2232 *
2233 * @param pPool The pool.
2234 * @param pPage The cached page.
2235 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2236 */
2237static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2238{
2239 /*
2240 * Insert into the GCPhys hash if the page is fit for that.
2241 */
2242 Assert(!pPage->fCached);
2243 if (fCanBeCached)
2244 {
2245 pPage->fCached = true;
2246 pgmPoolHashInsert(pPool, pPage);
2247 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2248 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2249 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2250 }
2251 else
2252 {
2253 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2254 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2255 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2256 }
2257
2258 /*
2259 * Insert at the head of the age list.
2260 */
2261 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2262 pPage->iAgeNext = pPool->iAgeHead;
2263 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2264 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2265 else
2266 pPool->iAgeTail = pPage->idx;
2267 pPool->iAgeHead = pPage->idx;
2268}
2269
2270
2271/**
2272 * Flushes a cached page.
2273 *
2274 * @param pPool The pool.
2275 * @param pPage The cached page.
2276 */
2277static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2278{
2279 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2280
2281 /*
2282 * Remove the page from the hash.
2283 */
2284 if (pPage->fCached)
2285 {
2286 pPage->fCached = false;
2287 pgmPoolHashRemove(pPool, pPage);
2288 }
2289 else
2290 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2291
2292 /*
2293 * Remove it from the age list.
2294 */
2295 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2296 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2297 else
2298 pPool->iAgeTail = pPage->iAgePrev;
2299 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2300 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2301 else
2302 pPool->iAgeHead = pPage->iAgeNext;
2303 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2304 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2305}
2306
2307
2308/**
2309 * Looks for pages sharing the monitor.
2310 *
2311 * @returns Pointer to the head page.
2312 * @returns NULL if not found.
2313 * @param pPool The Pool
2314 * @param pNewPage The page which is going to be monitored.
2315 */
2316static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2317{
2318 /*
2319 * Look up the GCPhys in the hash.
2320 */
2321 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2322 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2323 if (i == NIL_PGMPOOL_IDX)
2324 return NULL;
2325 do
2326 {
2327 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2328 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2329 && pPage != pNewPage)
2330 {
2331 switch (pPage->enmKind)
2332 {
2333 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2334 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2335 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2336 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2337 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2338 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2339 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2340 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2341 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2342 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2343 case PGMPOOLKIND_64BIT_PML4:
2344 case PGMPOOLKIND_32BIT_PD:
2345 case PGMPOOLKIND_PAE_PDPT:
2346 {
2347 /* find the head */
2348 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2349 {
2350 Assert(pPage->iMonitoredPrev != pPage->idx);
2351 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2352 }
2353 return pPage;
2354 }
2355
2356 /* ignore, no monitoring. */
2357 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2358 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2359 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2360 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2361 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2362 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2363 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2364 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2365 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2366 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2367 case PGMPOOLKIND_ROOT_NESTED:
2368 case PGMPOOLKIND_PAE_PD_PHYS:
2369 case PGMPOOLKIND_PAE_PDPT_PHYS:
2370 case PGMPOOLKIND_32BIT_PD_PHYS:
2371 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2372 break;
2373 default:
2374 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2375 }
2376 }
2377
2378 /* next */
2379 i = pPage->iNext;
2380 } while (i != NIL_PGMPOOL_IDX);
2381 return NULL;
2382}
2383
2384
2385/**
2386 * Enabled write monitoring of a guest page.
2387 *
2388 * @returns VBox status code.
2389 * @retval VINF_SUCCESS on success.
2390 * @param pPool The pool.
2391 * @param pPage The cached page.
2392 */
2393static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2394{
2395 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2396
2397 /*
2398 * Filter out the relevant kinds.
2399 */
2400 switch (pPage->enmKind)
2401 {
2402 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2403 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2404 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2405 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2406 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2407 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2408 case PGMPOOLKIND_64BIT_PML4:
2409 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2410 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2411 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2412 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2413 case PGMPOOLKIND_32BIT_PD:
2414 case PGMPOOLKIND_PAE_PDPT:
2415 break;
2416
2417 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2418 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2419 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2420 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2421 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2422 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2423 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2424 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2425 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2426 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2427 case PGMPOOLKIND_ROOT_NESTED:
2428 /* Nothing to monitor here. */
2429 return VINF_SUCCESS;
2430
2431 case PGMPOOLKIND_32BIT_PD_PHYS:
2432 case PGMPOOLKIND_PAE_PDPT_PHYS:
2433 case PGMPOOLKIND_PAE_PD_PHYS:
2434 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2435 /* Nothing to monitor here. */
2436 return VINF_SUCCESS;
2437 default:
2438 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2439 }
2440
2441 /*
2442 * Install handler.
2443 */
2444 int rc;
2445 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2446 if (pPageHead)
2447 {
2448 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2449 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2450
2451#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2452 if (pPageHead->fDirty)
2453 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirty, false /* do not remove */);
2454#endif
2455
2456 pPage->iMonitoredPrev = pPageHead->idx;
2457 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2458 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2459 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2460 pPageHead->iMonitoredNext = pPage->idx;
2461 rc = VINF_SUCCESS;
2462 }
2463 else
2464 {
2465 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2466 PVM pVM = pPool->CTX_SUFF(pVM);
2467 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2468 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
2469 GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK,
2470 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
2471 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
2472 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
2473 pPool->pszAccessHandler);
2474 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2475 * the heap size should suffice. */
2476 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2477 PVMCPU pVCpu = VMMGetCpu(pVM);
2478 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2479 }
2480 pPage->fMonitored = true;
2481 return rc;
2482}
2483
2484
2485/**
2486 * Disables write monitoring of a guest page.
2487 *
2488 * @returns VBox status code.
2489 * @retval VINF_SUCCESS on success.
2490 * @param pPool The pool.
2491 * @param pPage The cached page.
2492 */
2493static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2494{
2495 /*
2496 * Filter out the relevant kinds.
2497 */
2498 switch (pPage->enmKind)
2499 {
2500 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2501 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2502 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2503 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2504 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2505 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2506 case PGMPOOLKIND_64BIT_PML4:
2507 case PGMPOOLKIND_32BIT_PD:
2508 case PGMPOOLKIND_PAE_PDPT:
2509 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2510 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2511 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2512 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2513 break;
2514
2515 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2516 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2517 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2518 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2519 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2520 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2521 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2522 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2523 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2524 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2525 case PGMPOOLKIND_ROOT_NESTED:
2526 case PGMPOOLKIND_PAE_PD_PHYS:
2527 case PGMPOOLKIND_PAE_PDPT_PHYS:
2528 case PGMPOOLKIND_32BIT_PD_PHYS:
2529 /* Nothing to monitor here. */
2530 Assert(!pPage->fMonitored);
2531 return VINF_SUCCESS;
2532
2533 default:
2534 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2535 }
2536 Assert(pPage->fMonitored);
2537
2538 /*
2539 * Remove the page from the monitored list or uninstall it if last.
2540 */
2541 const PVM pVM = pPool->CTX_SUFF(pVM);
2542 int rc;
2543 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2544 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2545 {
2546 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2547 {
2548 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2549 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2550 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK,
2551 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
2552 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
2553 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
2554 pPool->pszAccessHandler);
2555 AssertFatalRCSuccess(rc);
2556 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2557 }
2558 else
2559 {
2560 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2561 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2562 {
2563 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2564 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2565 }
2566 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2567 rc = VINF_SUCCESS;
2568 }
2569 }
2570 else
2571 {
2572 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2573 AssertFatalRC(rc);
2574 PVMCPU pVCpu = VMMGetCpu(pVM);
2575 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2576 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2577 }
2578 pPage->fMonitored = false;
2579
2580 /*
2581 * Remove it from the list of modified pages (if in it).
2582 */
2583 pgmPoolMonitorModifiedRemove(pPool, pPage);
2584
2585 return rc;
2586}
2587
2588
2589/**
2590 * Inserts the page into the list of modified pages.
2591 *
2592 * @param pPool The pool.
2593 * @param pPage The page.
2594 */
2595void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2596{
2597 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2598 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2599 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2600 && pPool->iModifiedHead != pPage->idx,
2601 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2602 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2603 pPool->iModifiedHead, pPool->cModifiedPages));
2604
2605 pPage->iModifiedNext = pPool->iModifiedHead;
2606 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2607 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2608 pPool->iModifiedHead = pPage->idx;
2609 pPool->cModifiedPages++;
2610#ifdef VBOX_WITH_STATISTICS
2611 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2612 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2613#endif
2614}
2615
2616
2617/**
2618 * Removes the page from the list of modified pages and resets the
2619 * modification counter.
2620 *
2621 * @param pPool The pool.
2622 * @param pPage The page which is believed to be in the list of modified pages.
2623 */
2624static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2625{
2626 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2627 if (pPool->iModifiedHead == pPage->idx)
2628 {
2629 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2630 pPool->iModifiedHead = pPage->iModifiedNext;
2631 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2632 {
2633 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2634 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2635 }
2636 pPool->cModifiedPages--;
2637 }
2638 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2639 {
2640 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2641 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2642 {
2643 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2644 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2645 }
2646 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2647 pPool->cModifiedPages--;
2648 }
2649 else
2650 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2651 pPage->cModifications = 0;
2652}
2653
2654
2655/**
2656 * Zaps the list of modified pages, resetting their modification counters in the process.
2657 *
2658 * @param pVM The VM handle.
2659 */
2660static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2661{
2662 pgmLock(pVM);
2663 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2664 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2665
2666 unsigned cPages = 0; NOREF(cPages);
2667
2668#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2669 pgmPoolResetDirtyPages(pVM);
2670#endif
2671
2672 uint16_t idx = pPool->iModifiedHead;
2673 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2674 while (idx != NIL_PGMPOOL_IDX)
2675 {
2676 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2677 idx = pPage->iModifiedNext;
2678 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2679 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2680 pPage->cModifications = 0;
2681 Assert(++cPages);
2682 }
2683 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2684 pPool->cModifiedPages = 0;
2685 pgmUnlock(pVM);
2686}
2687
2688
2689/**
2690 * Handle SyncCR3 pool tasks
2691 *
2692 * @returns VBox status code.
2693 * @retval VINF_SUCCESS if successfully added.
2694 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2695 * @param pVCpu The VMCPU handle.
2696 * @remark Should only be used when monitoring is available, thus placed in
2697 * the PGMPOOL_WITH_MONITORING #ifdef.
2698 */
2699int pgmPoolSyncCR3(PVMCPU pVCpu)
2700{
2701 PVM pVM = pVCpu->CTX_SUFF(pVM);
2702 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2703
2704 /*
2705 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2706 * Occasionally we will have to clear all the shadow page tables because we wanted
2707 * to monitor a page which was mapped by too many shadowed page tables. This operation
2708 * sometimes referred to as a 'lightweight flush'.
2709 */
2710# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2711 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2712 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2713# else /* !IN_RING3 */
2714 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2715 {
2716 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2717 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2718
2719 /* Make sure all other VCPUs return to ring 3. */
2720 if (pVM->cCpus > 1)
2721 {
2722 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2723 PGM_INVL_ALL_VCPU_TLBS(pVM);
2724 }
2725 return VINF_PGM_SYNC_CR3;
2726 }
2727# endif /* !IN_RING3 */
2728 else
2729 {
2730 pgmPoolMonitorModifiedClearAll(pVM);
2731
2732 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2733 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2734 {
2735 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2736 return pgmPoolSyncCR3(pVCpu);
2737 }
2738 }
2739 return VINF_SUCCESS;
2740}
2741
2742
2743/**
2744 * Frees up at least one user entry.
2745 *
2746 * @returns VBox status code.
2747 * @retval VINF_SUCCESS if successfully added.
2748 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2749 * @param pPool The pool.
2750 * @param iUser The user index.
2751 */
2752static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2753{
2754 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2755 /*
2756 * Just free cached pages in a braindead fashion.
2757 */
2758 /** @todo walk the age list backwards and free the first with usage. */
2759 int rc = VINF_SUCCESS;
2760 do
2761 {
2762 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2763 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2764 rc = rc2;
2765 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2766 return rc;
2767}
2768
2769
2770/**
2771 * Inserts a page into the cache.
2772 *
2773 * This will create user node for the page, insert it into the GCPhys
2774 * hash, and insert it into the age list.
2775 *
2776 * @returns VBox status code.
2777 * @retval VINF_SUCCESS if successfully added.
2778 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2779 * @param pPool The pool.
2780 * @param pPage The cached page.
2781 * @param GCPhys The GC physical address of the page we're gonna shadow.
2782 * @param iUser The user index.
2783 * @param iUserTable The user table index.
2784 */
2785DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2786{
2787 int rc = VINF_SUCCESS;
2788 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2789
2790 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable));
2791
2792#ifdef VBOX_STRICT
2793 /*
2794 * Check that the entry doesn't already exists.
2795 */
2796 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2797 {
2798 uint16_t i = pPage->iUserHead;
2799 do
2800 {
2801 Assert(i < pPool->cMaxUsers);
2802 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2803 i = paUsers[i].iNext;
2804 } while (i != NIL_PGMPOOL_USER_INDEX);
2805 }
2806#endif
2807
2808 /*
2809 * Find free a user node.
2810 */
2811 uint16_t i = pPool->iUserFreeHead;
2812 if (i == NIL_PGMPOOL_USER_INDEX)
2813 {
2814 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2815 if (RT_FAILURE(rc))
2816 return rc;
2817 i = pPool->iUserFreeHead;
2818 }
2819
2820 /*
2821 * Unlink the user node from the free list,
2822 * initialize and insert it into the user list.
2823 */
2824 pPool->iUserFreeHead = paUsers[i].iNext;
2825 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2826 paUsers[i].iUser = iUser;
2827 paUsers[i].iUserTable = iUserTable;
2828 pPage->iUserHead = i;
2829
2830 /*
2831 * Insert into cache and enable monitoring of the guest page if enabled.
2832 *
2833 * Until we implement caching of all levels, including the CR3 one, we'll
2834 * have to make sure we don't try monitor & cache any recursive reuse of
2835 * a monitored CR3 page. Because all windows versions are doing this we'll
2836 * have to be able to do combined access monitoring, CR3 + PT and
2837 * PD + PT (guest PAE).
2838 *
2839 * Update:
2840 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2841 */
2842 const bool fCanBeMonitored = true;
2843 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2844 if (fCanBeMonitored)
2845 {
2846 rc = pgmPoolMonitorInsert(pPool, pPage);
2847 AssertRC(rc);
2848 }
2849 return rc;
2850}
2851
2852
2853/**
2854 * Adds a user reference to a page.
2855 *
2856 * This will move the page to the head of the
2857 *
2858 * @returns VBox status code.
2859 * @retval VINF_SUCCESS if successfully added.
2860 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2861 * @param pPool The pool.
2862 * @param pPage The cached page.
2863 * @param iUser The user index.
2864 * @param iUserTable The user table.
2865 */
2866static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2867{
2868 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2869
2870 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2871
2872# ifdef VBOX_STRICT
2873 /*
2874 * Check that the entry doesn't already exists. We only allow multiple
2875 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
2876 */
2877 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2878 {
2879 uint16_t i = pPage->iUserHead;
2880 do
2881 {
2882 Assert(i < pPool->cMaxUsers);
2883 AssertMsg(iUser != PGMPOOL_IDX_PD || iUser != PGMPOOL_IDX_PDPT || iUser != PGMPOOL_IDX_NESTED_ROOT || iUser != PGMPOOL_IDX_AMD64_CR3 ||
2884 paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2885 i = paUsers[i].iNext;
2886 } while (i != NIL_PGMPOOL_USER_INDEX);
2887 }
2888# endif
2889
2890 /*
2891 * Allocate a user node.
2892 */
2893 uint16_t i = pPool->iUserFreeHead;
2894 if (i == NIL_PGMPOOL_USER_INDEX)
2895 {
2896 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2897 if (RT_FAILURE(rc))
2898 return rc;
2899 i = pPool->iUserFreeHead;
2900 }
2901 pPool->iUserFreeHead = paUsers[i].iNext;
2902
2903 /*
2904 * Initialize the user node and insert it.
2905 */
2906 paUsers[i].iNext = pPage->iUserHead;
2907 paUsers[i].iUser = iUser;
2908 paUsers[i].iUserTable = iUserTable;
2909 pPage->iUserHead = i;
2910
2911# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2912 if (pPage->fDirty)
2913 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirty, false /* do not remove */);
2914# endif
2915
2916 /*
2917 * Tell the cache to update its replacement stats for this page.
2918 */
2919 pgmPoolCacheUsed(pPool, pPage);
2920 return VINF_SUCCESS;
2921}
2922
2923
2924/**
2925 * Frees a user record associated with a page.
2926 *
2927 * This does not clear the entry in the user table, it simply replaces the
2928 * user record to the chain of free records.
2929 *
2930 * @param pPool The pool.
2931 * @param HCPhys The HC physical address of the shadow page.
2932 * @param iUser The shadow page pool index of the user table.
2933 * @param iUserTable The index into the user table (shadowed).
2934 */
2935static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2936{
2937 /*
2938 * Unlink and free the specified user entry.
2939 */
2940 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2941
2942 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2943 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2944 uint16_t i = pPage->iUserHead;
2945 if ( i != NIL_PGMPOOL_USER_INDEX
2946 && paUsers[i].iUser == iUser
2947 && paUsers[i].iUserTable == iUserTable)
2948 {
2949 pPage->iUserHead = paUsers[i].iNext;
2950
2951 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2952 paUsers[i].iNext = pPool->iUserFreeHead;
2953 pPool->iUserFreeHead = i;
2954 return;
2955 }
2956
2957 /* General: Linear search. */
2958 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2959 while (i != NIL_PGMPOOL_USER_INDEX)
2960 {
2961 if ( paUsers[i].iUser == iUser
2962 && paUsers[i].iUserTable == iUserTable)
2963 {
2964 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2965 paUsers[iPrev].iNext = paUsers[i].iNext;
2966 else
2967 pPage->iUserHead = paUsers[i].iNext;
2968
2969 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2970 paUsers[i].iNext = pPool->iUserFreeHead;
2971 pPool->iUserFreeHead = i;
2972 return;
2973 }
2974 iPrev = i;
2975 i = paUsers[i].iNext;
2976 }
2977
2978 /* Fatal: didn't find it */
2979 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
2980 iUser, iUserTable, pPage->GCPhys));
2981}
2982
2983
2984/**
2985 * Gets the entry size of a shadow table.
2986 *
2987 * @param enmKind The kind of page.
2988 *
2989 * @returns The size of the entry in bytes. That is, 4 or 8.
2990 * @returns If the kind is not for a table, an assertion is raised and 0 is
2991 * returned.
2992 */
2993DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2994{
2995 switch (enmKind)
2996 {
2997 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2998 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2999 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3000 case PGMPOOLKIND_32BIT_PD:
3001 case PGMPOOLKIND_32BIT_PD_PHYS:
3002 return 4;
3003
3004 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3005 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3006 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3007 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3008 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3009 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3010 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3011 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3012 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3013 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3014 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3015 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3016 case PGMPOOLKIND_64BIT_PML4:
3017 case PGMPOOLKIND_PAE_PDPT:
3018 case PGMPOOLKIND_ROOT_NESTED:
3019 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3020 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3021 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3022 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3023 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3024 case PGMPOOLKIND_PAE_PD_PHYS:
3025 case PGMPOOLKIND_PAE_PDPT_PHYS:
3026 return 8;
3027
3028 default:
3029 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3030 }
3031}
3032
3033
3034/**
3035 * Gets the entry size of a guest table.
3036 *
3037 * @param enmKind The kind of page.
3038 *
3039 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3040 * @returns If the kind is not for a table, an assertion is raised and 0 is
3041 * returned.
3042 */
3043DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3044{
3045 switch (enmKind)
3046 {
3047 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3048 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3049 case PGMPOOLKIND_32BIT_PD:
3050 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3051 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3052 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3053 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3054 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3055 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3056 return 4;
3057
3058 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3059 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3060 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3061 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3062 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3063 case PGMPOOLKIND_64BIT_PML4:
3064 case PGMPOOLKIND_PAE_PDPT:
3065 return 8;
3066
3067 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3068 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3069 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3070 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3071 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3072 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3073 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3074 case PGMPOOLKIND_ROOT_NESTED:
3075 case PGMPOOLKIND_PAE_PD_PHYS:
3076 case PGMPOOLKIND_PAE_PDPT_PHYS:
3077 case PGMPOOLKIND_32BIT_PD_PHYS:
3078 /** @todo can we return 0? (nobody is calling this...) */
3079 AssertFailed();
3080 return 0;
3081
3082 default:
3083 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3084 }
3085}
3086
3087
3088/**
3089 * Checks one shadow page table entry for a mapping of a physical page.
3090 *
3091 * @returns true / false indicating removal of all relevant PTEs
3092 *
3093 * @param pVM The VM handle.
3094 * @param pPhysPage The guest page in question.
3095 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3096 * @param iShw The shadow page table.
3097 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3098 */
3099static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3100{
3101 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3102 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3103 bool fRet = false;
3104
3105 /*
3106 * Assert sanity.
3107 */
3108 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3109 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3110 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3111
3112 /*
3113 * Then, clear the actual mappings to the page in the shadow PT.
3114 */
3115 switch (pPage->enmKind)
3116 {
3117 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3118 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3119 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3120 {
3121 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3122 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3123 uint32_t u32AndMask = 0;
3124 uint32_t u32OrMask = 0;
3125
3126 if (!fFlushPTEs)
3127 {
3128 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3129 {
3130 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
3131 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
3132 u32OrMask = X86_PTE_RW;
3133 u32AndMask = UINT32_MAX;
3134 fRet = true;
3135 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3136 break;
3137
3138 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3139 u32OrMask = 0;
3140 u32AndMask = ~X86_PTE_RW;
3141 fRet = true;
3142 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3143 break;
3144 default:
3145 /* (shouldn't be here, will assert below) */
3146 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3147 break;
3148 }
3149 }
3150 else
3151 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3152
3153 /* Update the counter if we're removing references. */
3154 if (!u32AndMask)
3155 {
3156 Assert(pPage->cPresent );
3157 Assert(pPool->cPresent);
3158 pPage->cPresent--;
3159 pPool->cPresent--;
3160 }
3161
3162 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3163 {
3164 X86PTE Pte;
3165
3166 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3167 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3168 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3169 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3170
3171 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3172 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3173 return fRet;
3174 }
3175#ifdef LOG_ENABLED
3176 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3177 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3178 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3179 {
3180 Log(("i=%d cFound=%d\n", i, ++cFound));
3181 }
3182#endif
3183 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3184 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3185 break;
3186 }
3187
3188 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3189 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3190 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3191 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3192 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3193 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3194 {
3195 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3196 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3197 uint64_t u64OrMask = 0;
3198 uint64_t u64AndMask = 0;
3199
3200 if (!fFlushPTEs)
3201 {
3202 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3203 {
3204 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3205 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3206 u64OrMask = X86_PTE_RW;
3207 u64AndMask = UINT64_MAX;
3208 fRet = true;
3209 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3210 break;
3211
3212 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3213 u64OrMask = 0;
3214 u64AndMask = ~(uint64_t)X86_PTE_RW;
3215 fRet = true;
3216 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3217 break;
3218
3219 default:
3220 /* (shouldn't be here, will assert below) */
3221 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3222 break;
3223 }
3224 }
3225 else
3226 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3227
3228 /* Update the counter if we're removing references. */
3229 if (!u64AndMask)
3230 {
3231 Assert(pPage->cPresent);
3232 Assert(pPool->cPresent);
3233 pPage->cPresent--;
3234 pPool->cPresent--;
3235 }
3236
3237 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3238 {
3239 X86PTEPAE Pte;
3240
3241 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3242 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3243 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3244 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3245
3246 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3247 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3248 return fRet;
3249 }
3250#ifdef LOG_ENABLED
3251 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3252 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3253 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3254 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3255 Log(("i=%d cFound=%d\n", i, ++cFound));
3256#endif
3257 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3258 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3259 break;
3260 }
3261
3262#ifdef PGM_WITH_LARGE_PAGES
3263 /* Large page case only. */
3264 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3265 {
3266 Assert(pVM->pgm.s.fNestedPaging);
3267
3268 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3269 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3270
3271 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3272 {
3273 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3274 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3275 pPD->a[iPte].u = 0;
3276 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3277
3278 /* Update the counter as we're removing references. */
3279 Assert(pPage->cPresent);
3280 Assert(pPool->cPresent);
3281 pPage->cPresent--;
3282 pPool->cPresent--;
3283
3284 return fRet;
3285 }
3286# ifdef LOG_ENABLED
3287 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3288 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3289 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3290 Log(("i=%d cFound=%d\n", i, ++cFound));
3291# endif
3292 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3293 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3294 break;
3295 }
3296
3297 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3298 case PGMPOOLKIND_PAE_PD_PHYS:
3299 {
3300 Assert(pVM->pgm.s.fNestedPaging);
3301
3302 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3303 PX86PD pPD = (PX86PD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3304
3305 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3306 {
3307 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3308 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3309 pPD->a[iPte].u = 0;
3310 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3311
3312 /* Update the counter as we're removing references. */
3313 Assert(pPage->cPresent);
3314 Assert(pPool->cPresent);
3315 pPage->cPresent--;
3316 pPool->cPresent--;
3317 return fRet;
3318 }
3319# ifdef LOG_ENABLED
3320 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3321 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3322 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3323 Log(("i=%d cFound=%d\n", i, ++cFound));
3324# endif
3325 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3326 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3327 break;
3328 }
3329#endif /* PGM_WITH_LARGE_PAGES */
3330
3331 default:
3332 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3333 }
3334 return fRet;
3335}
3336
3337
3338/**
3339 * Scans one shadow page table for mappings of a physical page.
3340 *
3341 * @param pVM The VM handle.
3342 * @param pPhysPage The guest page in question.
3343 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3344 * @param iShw The shadow page table.
3345 */
3346static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3347{
3348 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3349
3350 /* We should only come here with when there's only one reference to this physical page. */
3351 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3352
3353 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3354 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3355 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3356 if (!fKeptPTEs)
3357 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3358 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3359}
3360
3361
3362/**
3363 * Flushes a list of shadow page tables mapping the same physical page.
3364 *
3365 * @param pVM The VM handle.
3366 * @param pPhysPage The guest page in question.
3367 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3368 * @param iPhysExt The physical cross reference extent list to flush.
3369 */
3370static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3371{
3372 PGM_LOCK_ASSERT_OWNER(pVM);
3373 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3374 bool fKeepList = false;
3375
3376 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3377 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3378
3379 const uint16_t iPhysExtStart = iPhysExt;
3380 PPGMPOOLPHYSEXT pPhysExt;
3381 do
3382 {
3383 Assert(iPhysExt < pPool->cMaxPhysExts);
3384 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3385 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3386 {
3387 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3388 {
3389 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3390 if (!fKeptPTEs)
3391 {
3392 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3393 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3394 }
3395 else
3396 fKeepList = true;
3397 }
3398 }
3399 /* next */
3400 iPhysExt = pPhysExt->iNext;
3401 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3402
3403 if (!fKeepList)
3404 {
3405 /* insert the list into the free list and clear the ram range entry. */
3406 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3407 pPool->iPhysExtFreeHead = iPhysExtStart;
3408 /* Invalidate the tracking data. */
3409 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3410 }
3411
3412 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3413}
3414
3415
3416/**
3417 * Flushes all shadow page table mappings of the given guest page.
3418 *
3419 * This is typically called when the host page backing the guest one has been
3420 * replaced or when the page protection was changed due to a guest access
3421 * caught by the monitoring.
3422 *
3423 * @returns VBox status code.
3424 * @retval VINF_SUCCESS if all references has been successfully cleared.
3425 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3426 * pool cleaning. FF and sync flags are set.
3427 *
3428 * @param pVM The VM handle.
3429 * @param GCPhysPage GC physical address of the page in question
3430 * @param pPhysPage The guest page in question.
3431 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3432 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3433 * flushed, it is NOT touched if this isn't necessary.
3434 * The caller MUST initialized this to @a false.
3435 */
3436int pgmPoolTrackUpdateGCPhys(PVM pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3437{
3438 PVMCPU pVCpu = VMMGetCpu(pVM);
3439 pgmLock(pVM);
3440 int rc = VINF_SUCCESS;
3441
3442#ifdef PGM_WITH_LARGE_PAGES
3443 /* Is this page part of a large page? */
3444 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3445 {
3446 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3447 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3448
3449 /* Fetch the large page base. */
3450 PPGMPAGE pLargePage;
3451 if (GCPhysBase != GCPhysPage)
3452 {
3453 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3454 AssertFatal(pLargePage);
3455 }
3456 else
3457 pLargePage = pPhysPage;
3458
3459 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3460
3461 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3462 {
3463 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3464 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3465 pVM->pgm.s.cLargePagesDisabled++;
3466
3467 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3468 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3469
3470 *pfFlushTLBs = true;
3471 pgmUnlock(pVM);
3472 return rc;
3473 }
3474 }
3475#else
3476 NOREF(GCPhysPage);
3477#endif /* PGM_WITH_LARGE_PAGES */
3478
3479 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3480 if (u16)
3481 {
3482 /*
3483 * The zero page is currently screwing up the tracking and we'll
3484 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3485 * is defined, zero pages won't normally be mapped. Some kind of solution
3486 * will be needed for this problem of course, but it will have to wait...
3487 */
3488 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3489 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3490 rc = VINF_PGM_GCPHYS_ALIASED;
3491 else
3492 {
3493# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC) /** @todo we can drop this now. */
3494 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3495 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3496 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
3497# endif
3498
3499 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3500 {
3501 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3502 pgmPoolTrackFlushGCPhysPT(pVM,
3503 pPhysPage,
3504 fFlushPTEs,
3505 PGMPOOL_TD_GET_IDX(u16));
3506 }
3507 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3508 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3509 else
3510 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3511 *pfFlushTLBs = true;
3512
3513# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
3514 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
3515# endif
3516 }
3517 }
3518
3519 if (rc == VINF_PGM_GCPHYS_ALIASED)
3520 {
3521 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3522 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3523 rc = VINF_PGM_SYNC_CR3;
3524 }
3525 pgmUnlock(pVM);
3526 return rc;
3527}
3528
3529
3530/**
3531 * Scans all shadow page tables for mappings of a physical page.
3532 *
3533 * This may be slow, but it's most likely more efficient than cleaning
3534 * out the entire page pool / cache.
3535 *
3536 * @returns VBox status code.
3537 * @retval VINF_SUCCESS if all references has been successfully cleared.
3538 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3539 * a page pool cleaning.
3540 *
3541 * @param pVM The VM handle.
3542 * @param pPhysPage The guest page in question.
3543 */
3544int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3545{
3546 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3547 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3548 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3549 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3550
3551 /*
3552 * There is a limit to what makes sense.
3553 */
3554 if ( pPool->cPresent > 1024
3555 && pVM->cCpus == 1)
3556 {
3557 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3558 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3559 return VINF_PGM_GCPHYS_ALIASED;
3560 }
3561
3562 /*
3563 * Iterate all the pages until we've encountered all that in use.
3564 * This is simple but not quite optimal solution.
3565 */
3566 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P; /** @todo drop X86_PTE_P here as we always test if present separately, anyway. */
3567 const uint32_t u32 = u64; /** @todo move into the 32BIT_PT_xx case */
3568 unsigned cLeft = pPool->cUsedPages;
3569 unsigned iPage = pPool->cCurPages;
3570 while (--iPage >= PGMPOOL_IDX_FIRST)
3571 {
3572 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3573 if ( pPage->GCPhys != NIL_RTGCPHYS
3574 && pPage->cPresent)
3575 {
3576 switch (pPage->enmKind)
3577 {
3578 /*
3579 * We only care about shadow page tables.
3580 */
3581 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3582 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3583 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3584 {
3585 unsigned cPresent = pPage->cPresent;
3586 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3587 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3588 if (pPT->a[i].n.u1Present)
3589 {
3590 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3591 {
3592 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3593 pPT->a[i].u = 0;
3594
3595 /* Update the counter as we're removing references. */
3596 Assert(pPage->cPresent);
3597 Assert(pPool->cPresent);
3598 pPage->cPresent--;
3599 pPool->cPresent--;
3600 }
3601 if (!--cPresent)
3602 break;
3603 }
3604 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3605 break;
3606 }
3607
3608 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3609 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3610 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3611 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3612 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3613 {
3614 unsigned cPresent = pPage->cPresent;
3615 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3616 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3617 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3618 {
3619 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3620 {
3621 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3622 PGMSHWPTEPAE_SET(pPT->a[i], 0); /// @todo why not atomic?
3623
3624 /* Update the counter as we're removing references. */
3625 Assert(pPage->cPresent);
3626 Assert(pPool->cPresent);
3627 pPage->cPresent--;
3628 pPool->cPresent--;
3629 }
3630 if (!--cPresent)
3631 break;
3632 }
3633 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3634 break;
3635 }
3636#ifndef IN_RC
3637 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3638 {
3639 unsigned cPresent = pPage->cPresent;
3640 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3641 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3642 if (pPT->a[i].n.u1Present)
3643 {
3644 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3645 {
3646 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3647 pPT->a[i].u = 0;
3648
3649 /* Update the counter as we're removing references. */
3650 Assert(pPage->cPresent);
3651 Assert(pPool->cPresent);
3652 pPage->cPresent--;
3653 pPool->cPresent--;
3654 }
3655 if (!--cPresent)
3656 break;
3657 }
3658 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3659 break;
3660 }
3661#endif
3662 }
3663 if (!--cLeft)
3664 break;
3665 }
3666 }
3667
3668 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3669 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3670
3671 /*
3672 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3673 */
3674 if (pPool->cPresent > 1024)
3675 {
3676 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3677 return VINF_PGM_GCPHYS_ALIASED;
3678 }
3679
3680 return VINF_SUCCESS;
3681}
3682
3683
3684/**
3685 * Clears the user entry in a user table.
3686 *
3687 * This is used to remove all references to a page when flushing it.
3688 */
3689static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3690{
3691 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3692 Assert(pUser->iUser < pPool->cCurPages);
3693 uint32_t iUserTable = pUser->iUserTable;
3694
3695 /*
3696 * Map the user page.
3697 */
3698 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3699 union
3700 {
3701 uint64_t *pau64;
3702 uint32_t *pau32;
3703 } u;
3704 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3705
3706 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3707
3708 /* Safety precaution in case we change the paging for other modes too in the future. */
3709 Assert(!pgmPoolIsPageLocked(pPage));
3710
3711#ifdef VBOX_STRICT
3712 /*
3713 * Some sanity checks.
3714 */
3715 switch (pUserPage->enmKind)
3716 {
3717 case PGMPOOLKIND_32BIT_PD:
3718 case PGMPOOLKIND_32BIT_PD_PHYS:
3719 Assert(iUserTable < X86_PG_ENTRIES);
3720 break;
3721 case PGMPOOLKIND_PAE_PDPT:
3722 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3723 case PGMPOOLKIND_PAE_PDPT_PHYS:
3724 Assert(iUserTable < 4);
3725 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3726 break;
3727 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3728 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3729 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3730 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3731 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3732 case PGMPOOLKIND_PAE_PD_PHYS:
3733 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3734 break;
3735 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3736 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3737 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3738 break;
3739 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3740 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3741 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3742 break;
3743 case PGMPOOLKIND_64BIT_PML4:
3744 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3745 /* GCPhys >> PAGE_SHIFT is the index here */
3746 break;
3747 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3748 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3749 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3750 break;
3751
3752 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3753 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3754 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3755 break;
3756
3757 case PGMPOOLKIND_ROOT_NESTED:
3758 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3759 break;
3760
3761 default:
3762 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3763 break;
3764 }
3765#endif /* VBOX_STRICT */
3766
3767 /*
3768 * Clear the entry in the user page.
3769 */
3770 switch (pUserPage->enmKind)
3771 {
3772 /* 32-bit entries */
3773 case PGMPOOLKIND_32BIT_PD:
3774 case PGMPOOLKIND_32BIT_PD_PHYS:
3775 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
3776 break;
3777
3778 /* 64-bit entries */
3779 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3780 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3781 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3782 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3783 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3784#ifdef IN_RC
3785 /*
3786 * In 32 bits PAE mode we *must* invalidate the TLB when changing a
3787 * PDPT entry; the CPU fetches them only during cr3 load, so any
3788 * non-present PDPT will continue to cause page faults.
3789 */
3790 ASMReloadCR3();
3791 /* no break */
3792#endif
3793 case PGMPOOLKIND_PAE_PD_PHYS:
3794 case PGMPOOLKIND_PAE_PDPT_PHYS:
3795 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3796 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3797 case PGMPOOLKIND_64BIT_PML4:
3798 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3799 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3800 case PGMPOOLKIND_PAE_PDPT:
3801 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3802 case PGMPOOLKIND_ROOT_NESTED:
3803 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3804 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3805 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
3806 break;
3807
3808 default:
3809 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3810 }
3811 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
3812}
3813
3814
3815/**
3816 * Clears all users of a page.
3817 */
3818static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3819{
3820 /*
3821 * Free all the user records.
3822 */
3823 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3824
3825 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3826 uint16_t i = pPage->iUserHead;
3827 while (i != NIL_PGMPOOL_USER_INDEX)
3828 {
3829 /* Clear enter in user table. */
3830 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3831
3832 /* Free it. */
3833 const uint16_t iNext = paUsers[i].iNext;
3834 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3835 paUsers[i].iNext = pPool->iUserFreeHead;
3836 pPool->iUserFreeHead = i;
3837
3838 /* Next. */
3839 i = iNext;
3840 }
3841 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3842}
3843
3844
3845/**
3846 * Allocates a new physical cross reference extent.
3847 *
3848 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3849 * @param pVM The VM handle.
3850 * @param piPhysExt Where to store the phys ext index.
3851 */
3852PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3853{
3854 PGM_LOCK_ASSERT_OWNER(pVM);
3855 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3856 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3857 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3858 {
3859 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3860 return NULL;
3861 }
3862 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3863 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3864 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3865 *piPhysExt = iPhysExt;
3866 return pPhysExt;
3867}
3868
3869
3870/**
3871 * Frees a physical cross reference extent.
3872 *
3873 * @param pVM The VM handle.
3874 * @param iPhysExt The extent to free.
3875 */
3876void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3877{
3878 PGM_LOCK_ASSERT_OWNER(pVM);
3879 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3880 Assert(iPhysExt < pPool->cMaxPhysExts);
3881 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3882 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3883 {
3884 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3885 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3886 }
3887 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3888 pPool->iPhysExtFreeHead = iPhysExt;
3889}
3890
3891
3892/**
3893 * Frees a physical cross reference extent.
3894 *
3895 * @param pVM The VM handle.
3896 * @param iPhysExt The extent to free.
3897 */
3898void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3899{
3900 PGM_LOCK_ASSERT_OWNER(pVM);
3901 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3902
3903 const uint16_t iPhysExtStart = iPhysExt;
3904 PPGMPOOLPHYSEXT pPhysExt;
3905 do
3906 {
3907 Assert(iPhysExt < pPool->cMaxPhysExts);
3908 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3909 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3910 {
3911 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3912 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3913 }
3914
3915 /* next */
3916 iPhysExt = pPhysExt->iNext;
3917 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3918
3919 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3920 pPool->iPhysExtFreeHead = iPhysExtStart;
3921}
3922
3923
3924/**
3925 * Insert a reference into a list of physical cross reference extents.
3926 *
3927 * @returns The new tracking data for PGMPAGE.
3928 *
3929 * @param pVM The VM handle.
3930 * @param iPhysExt The physical extent index of the list head.
3931 * @param iShwPT The shadow page table index.
3932 * @param iPte Page table entry
3933 *
3934 */
3935static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
3936{
3937 PGM_LOCK_ASSERT_OWNER(pVM);
3938 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3939 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3940
3941 /*
3942 * Special common cases.
3943 */
3944 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
3945 {
3946 paPhysExts[iPhysExt].aidx[1] = iShwPT;
3947 paPhysExts[iPhysExt].apte[1] = iPte;
3948 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
3949 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
3950 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3951 }
3952 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3953 {
3954 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3955 paPhysExts[iPhysExt].apte[2] = iPte;
3956 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
3957 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
3958 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3959 }
3960 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
3961
3962 /*
3963 * General treatment.
3964 */
3965 const uint16_t iPhysExtStart = iPhysExt;
3966 unsigned cMax = 15;
3967 for (;;)
3968 {
3969 Assert(iPhysExt < pPool->cMaxPhysExts);
3970 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3971 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3972 {
3973 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3974 paPhysExts[iPhysExt].apte[i] = iPte;
3975 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
3976 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
3977 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3978 }
3979 if (!--cMax)
3980 {
3981 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackOverflows);
3982 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3983 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
3984 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3985 }
3986
3987 /* advance */
3988 iPhysExt = paPhysExts[iPhysExt].iNext;
3989 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3990 break;
3991 }
3992
3993 /*
3994 * Add another extent to the list.
3995 */
3996 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3997 if (!pNew)
3998 {
3999 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackNoExtentsLeft);
4000 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4001 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4002 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4003 }
4004 pNew->iNext = iPhysExtStart;
4005 pNew->aidx[0] = iShwPT;
4006 pNew->apte[0] = iPte;
4007 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4008 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4009}
4010
4011
4012/**
4013 * Add a reference to guest physical page where extents are in use.
4014 *
4015 * @returns The new tracking data for PGMPAGE.
4016 *
4017 * @param pVM The VM handle.
4018 * @param pPhysPage Pointer to the aPages entry in the ram range.
4019 * @param u16 The ram range flags (top 16-bits).
4020 * @param iShwPT The shadow page table index.
4021 * @param iPte Page table entry
4022 */
4023uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4024{
4025 pgmLock(pVM);
4026 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4027 {
4028 /*
4029 * Convert to extent list.
4030 */
4031 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4032 uint16_t iPhysExt;
4033 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4034 if (pPhysExt)
4035 {
4036 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4037 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliased);
4038 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4039 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4040 pPhysExt->aidx[1] = iShwPT;
4041 pPhysExt->apte[1] = iPte;
4042 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4043 }
4044 else
4045 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4046 }
4047 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4048 {
4049 /*
4050 * Insert into the extent list.
4051 */
4052 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4053 }
4054 else
4055 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedLots);
4056 pgmUnlock(pVM);
4057 return u16;
4058}
4059
4060
4061/**
4062 * Clear references to guest physical memory.
4063 *
4064 * @param pPool The pool.
4065 * @param pPage The page.
4066 * @param pPhysPage Pointer to the aPages entry in the ram range.
4067 * @param iPte Shadow PTE index
4068 */
4069void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4070{
4071 PVM pVM = pPool->CTX_SUFF(pVM);
4072 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4073 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4074
4075 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4076 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4077 {
4078 pgmLock(pVM);
4079
4080 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4081 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4082 do
4083 {
4084 Assert(iPhysExt < pPool->cMaxPhysExts);
4085
4086 /*
4087 * Look for the shadow page and check if it's all freed.
4088 */
4089 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4090 {
4091 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4092 && paPhysExts[iPhysExt].apte[i] == iPte)
4093 {
4094 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4095 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4096
4097 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4098 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4099 {
4100 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4101 pgmUnlock(pVM);
4102 return;
4103 }
4104
4105 /* we can free the node. */
4106 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4107 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4108 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4109 {
4110 /* lonely node */
4111 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4112 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4113 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4114 }
4115 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4116 {
4117 /* head */
4118 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4119 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4120 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4121 }
4122 else
4123 {
4124 /* in list */
4125 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4126 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4127 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4128 }
4129 iPhysExt = iPhysExtNext;
4130 pgmUnlock(pVM);
4131 return;
4132 }
4133 }
4134
4135 /* next */
4136 iPhysExtPrev = iPhysExt;
4137 iPhysExt = paPhysExts[iPhysExt].iNext;
4138 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4139
4140 pgmUnlock(pVM);
4141 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4142 }
4143 else /* nothing to do */
4144 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4145}
4146
4147/**
4148 * Clear references to guest physical memory.
4149 *
4150 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4151 * physical address is assumed to be correct, so the linear search can be
4152 * skipped and we can assert at an earlier point.
4153 *
4154 * @param pPool The pool.
4155 * @param pPage The page.
4156 * @param HCPhys The host physical address corresponding to the guest page.
4157 * @param GCPhys The guest physical address corresponding to HCPhys.
4158 * @param iPte Shadow PTE index
4159 */
4160static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4161{
4162 /*
4163 * Lookup the page and check if it checks out before derefing it.
4164 */
4165 PVM pVM = pPool->CTX_SUFF(pVM);
4166 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4167 if (pPhysPage)
4168 {
4169 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4170#ifdef LOG_ENABLED
4171 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4172 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4173#endif
4174 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4175 {
4176 Assert(pPage->cPresent);
4177 Assert(pPool->cPresent);
4178 pPage->cPresent--;
4179 pPool->cPresent--;
4180 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4181 return;
4182 }
4183
4184 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp\n",
4185 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage)));
4186 }
4187 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4188}
4189
4190
4191/**
4192 * Clear references to guest physical memory.
4193 *
4194 * @param pPool The pool.
4195 * @param pPage The page.
4196 * @param HCPhys The host physical address corresponding to the guest page.
4197 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4198 * @param iPte Shadow pte index
4199 */
4200void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4201{
4202 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4203
4204 /*
4205 * Try the hint first.
4206 */
4207 RTHCPHYS HCPhysHinted;
4208 PVM pVM = pPool->CTX_SUFF(pVM);
4209 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4210 if (pPhysPage)
4211 {
4212 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4213 Assert(HCPhysHinted);
4214 if (HCPhysHinted == HCPhys)
4215 {
4216 Assert(pPage->cPresent);
4217 Assert(pPool->cPresent);
4218 pPage->cPresent--;
4219 pPool->cPresent--;
4220 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4221 return;
4222 }
4223 }
4224 else
4225 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4226
4227 /*
4228 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4229 */
4230 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4231 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX);
4232 while (pRam)
4233 {
4234 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4235 while (iPage-- > 0)
4236 {
4237 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4238 {
4239 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4240 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4241 Assert(pPage->cPresent);
4242 Assert(pPool->cPresent);
4243 pPage->cPresent--;
4244 pPool->cPresent--;
4245 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4246 return;
4247 }
4248 }
4249 pRam = pRam->CTX_SUFF(pNext);
4250 }
4251
4252 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4253}
4254
4255
4256/**
4257 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4258 *
4259 * @param pPool The pool.
4260 * @param pPage The page.
4261 * @param pShwPT The shadow page table (mapping of the page).
4262 * @param pGstPT The guest page table.
4263 */
4264DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4265{
4266 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4267 {
4268 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4269 if (pShwPT->a[i].n.u1Present)
4270 {
4271 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4272 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4273 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK, i);
4274 if (!pPage->cPresent)
4275 break;
4276 }
4277 }
4278}
4279
4280
4281/**
4282 * Clear references to guest physical memory in a PAE / 32-bit page table.
4283 *
4284 * @param pPool The pool.
4285 * @param pPage The page.
4286 * @param pShwPT The shadow page table (mapping of the page).
4287 * @param pGstPT The guest page table (just a half one).
4288 */
4289DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4290{
4291 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4292 {
4293 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4294 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4295 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4296 {
4297 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4298 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4299 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK, i);
4300 if (!pPage->cPresent)
4301 break;
4302 }
4303 }
4304}
4305
4306
4307/**
4308 * Clear references to guest physical memory in a PAE / PAE page table.
4309 *
4310 * @param pPool The pool.
4311 * @param pPage The page.
4312 * @param pShwPT The shadow page table (mapping of the page).
4313 * @param pGstPT The guest page table.
4314 */
4315DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4316{
4317 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4318 {
4319 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4320 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4321 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4322 {
4323 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4324 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4325 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
4326 if (!pPage->cPresent)
4327 break;
4328 }
4329 }
4330}
4331
4332
4333/**
4334 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4335 *
4336 * @param pPool The pool.
4337 * @param pPage The page.
4338 * @param pShwPT The shadow page table (mapping of the page).
4339 */
4340DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4341{
4342 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4343 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4344 {
4345 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4346 if (pShwPT->a[i].n.u1Present)
4347 {
4348 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4349 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4350 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys, i);
4351 if (!pPage->cPresent)
4352 break;
4353 }
4354 }
4355}
4356
4357
4358/**
4359 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4360 *
4361 * @param pPool The pool.
4362 * @param pPage The page.
4363 * @param pShwPT The shadow page table (mapping of the page).
4364 */
4365DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4366{
4367 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4368 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4369 {
4370 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4371 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4372 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4373 {
4374 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4375 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4376 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys, i);
4377 if (!pPage->cPresent)
4378 break;
4379 }
4380 }
4381}
4382
4383
4384/**
4385 * Clear references to shadowed pages in an EPT page table.
4386 *
4387 * @param pPool The pool.
4388 * @param pPage The page.
4389 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4390 */
4391DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4392{
4393 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4394 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4395 {
4396 Assert((pShwPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4397 if (pShwPT->a[i].n.u1Present)
4398 {
4399 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4400 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4401 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys, i);
4402 if (!pPage->cPresent)
4403 break;
4404 }
4405 }
4406}
4407
4408
4409
4410/**
4411 * Clear references to shadowed pages in a 32 bits page directory.
4412 *
4413 * @param pPool The pool.
4414 * @param pPage The page.
4415 * @param pShwPD The shadow page directory (mapping of the page).
4416 */
4417DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4418{
4419 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4420 {
4421 Assert(!(pShwPD->a[i].u & RT_BIT_32(9)));
4422 if ( pShwPD->a[i].n.u1Present
4423 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4424 )
4425 {
4426 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4427 if (pSubPage)
4428 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4429 else
4430 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4431 }
4432 }
4433}
4434
4435/**
4436 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4437 *
4438 * @param pPool The pool.
4439 * @param pPage The page.
4440 * @param pShwPD The shadow page directory (mapping of the page).
4441 */
4442DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4443{
4444 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4445 {
4446 if ( pShwPD->a[i].n.u1Present
4447 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING))
4448 {
4449#ifdef PGM_WITH_LARGE_PAGES
4450 if (pShwPD->a[i].b.u1Size)
4451 {
4452 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4453 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4454 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */, i);
4455 }
4456 else
4457#endif
4458 {
4459 Assert((pShwPD->a[i].u & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4460 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4461 if (pSubPage)
4462 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4463 else
4464 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4465 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4466 }
4467 }
4468 }
4469}
4470
4471/**
4472 * Clear references to shadowed pages in a PAE page directory pointer table.
4473 *
4474 * @param pPool The pool.
4475 * @param pPage The page.
4476 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4477 */
4478DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4479{
4480 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4481 {
4482 Assert((pShwPDPT->a[i].u & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4483 if ( pShwPDPT->a[i].n.u1Present
4484 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4485 )
4486 {
4487 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4488 if (pSubPage)
4489 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4490 else
4491 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4492 }
4493 }
4494}
4495
4496
4497/**
4498 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4499 *
4500 * @param pPool The pool.
4501 * @param pPage The page.
4502 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4503 */
4504DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4505{
4506 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4507 {
4508 Assert((pShwPDPT->a[i].u & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4509 if (pShwPDPT->a[i].n.u1Present)
4510 {
4511 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4512 if (pSubPage)
4513 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4514 else
4515 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4516 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4517 }
4518 }
4519}
4520
4521
4522/**
4523 * Clear references to shadowed pages in a 64-bit level 4 page table.
4524 *
4525 * @param pPool The pool.
4526 * @param pPage The page.
4527 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4528 */
4529DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4530{
4531 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4532 {
4533 Assert((pShwPML4->a[i].u & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4534 if (pShwPML4->a[i].n.u1Present)
4535 {
4536 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4537 if (pSubPage)
4538 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4539 else
4540 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4541 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4542 }
4543 }
4544}
4545
4546
4547/**
4548 * Clear references to shadowed pages in an EPT page directory.
4549 *
4550 * @param pPool The pool.
4551 * @param pPage The page.
4552 * @param pShwPD The shadow page directory (mapping of the page).
4553 */
4554DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4555{
4556 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4557 {
4558 Assert((pShwPD->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4559 if (pShwPD->a[i].n.u1Present)
4560 {
4561#ifdef PGM_WITH_LARGE_PAGES
4562 if (pShwPD->a[i].b.u1Size)
4563 {
4564 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4565 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4566 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */, i);
4567 }
4568 else
4569#endif
4570 {
4571 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4572 if (pSubPage)
4573 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4574 else
4575 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4576 }
4577 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4578 }
4579 }
4580}
4581
4582
4583/**
4584 * Clear references to shadowed pages in an EPT page directory pointer table.
4585 *
4586 * @param pPool The pool.
4587 * @param pPage The page.
4588 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4589 */
4590DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4591{
4592 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4593 {
4594 Assert((pShwPDPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4595 if (pShwPDPT->a[i].n.u1Present)
4596 {
4597 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4598 if (pSubPage)
4599 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4600 else
4601 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4602 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4603 }
4604 }
4605}
4606
4607
4608/**
4609 * Clears all references made by this page.
4610 *
4611 * This includes other shadow pages and GC physical addresses.
4612 *
4613 * @param pPool The pool.
4614 * @param pPage The page.
4615 */
4616static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4617{
4618 /*
4619 * Map the shadow page and take action according to the page kind.
4620 */
4621 PVM pVM = pPool->CTX_SUFF(pVM);
4622 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4623 switch (pPage->enmKind)
4624 {
4625 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4626 {
4627 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4628 void *pvGst;
4629 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4630 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4631 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4632 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4633 break;
4634 }
4635
4636 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4637 {
4638 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4639 void *pvGst;
4640 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4641 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
4642 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4643 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4644 break;
4645 }
4646
4647 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4648 {
4649 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4650 void *pvGst;
4651 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4652 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
4653 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4654 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4655 break;
4656 }
4657
4658 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4659 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4660 {
4661 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4662 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4663 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4664 break;
4665 }
4666
4667 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4668 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4669 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4670 {
4671 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4672 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
4673 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4674 break;
4675 }
4676
4677 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4678 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4679 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4680 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4681 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4682 case PGMPOOLKIND_PAE_PD_PHYS:
4683 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4684 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4685 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4686 break;
4687
4688 case PGMPOOLKIND_32BIT_PD_PHYS:
4689 case PGMPOOLKIND_32BIT_PD:
4690 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4691 break;
4692
4693 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4694 case PGMPOOLKIND_PAE_PDPT:
4695 case PGMPOOLKIND_PAE_PDPT_PHYS:
4696 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4697 break;
4698
4699 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4700 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4701 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4702 break;
4703
4704 case PGMPOOLKIND_64BIT_PML4:
4705 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4706 break;
4707
4708 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4709 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4710 break;
4711
4712 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4713 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4714 break;
4715
4716 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4717 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4718 break;
4719
4720 default:
4721 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4722 }
4723
4724 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4725 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4726 ASMMemZeroPage(pvShw);
4727 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4728 pPage->fZeroed = true;
4729 Assert(!pPage->cPresent);
4730 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
4731}
4732
4733/**
4734 * Flushes a pool page.
4735 *
4736 * This moves the page to the free list after removing all user references to it.
4737 *
4738 * @returns VBox status code.
4739 * @retval VINF_SUCCESS on success.
4740 * @param pPool The pool.
4741 * @param HCPhys The HC physical address of the shadow page.
4742 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4743 */
4744int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4745{
4746 PVM pVM = pPool->CTX_SUFF(pVM);
4747 bool fFlushRequired = false;
4748
4749 int rc = VINF_SUCCESS;
4750 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4751 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4752 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4753
4754 /*
4755 * Quietly reject any attempts at flushing any of the special root pages.
4756 */
4757 if (pPage->idx < PGMPOOL_IDX_FIRST)
4758 {
4759 AssertFailed(); /* can no longer happen */
4760 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4761 return VINF_SUCCESS;
4762 }
4763
4764 pgmLock(pVM);
4765
4766 /*
4767 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4768 */
4769 if (pgmPoolIsPageLocked(pPage))
4770 {
4771 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4772 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4773 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4774 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4775 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4776 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4777 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4778 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4779 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
4780 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
4781 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4782 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4783 pgmUnlock(pVM);
4784 return VINF_SUCCESS;
4785 }
4786
4787#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4788 /* Start a subset so we won't run out of mapping space. */
4789 PVMCPU pVCpu = VMMGetCpu(pVM);
4790 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
4791#endif
4792
4793 /*
4794 * Mark the page as being in need of an ASMMemZeroPage().
4795 */
4796 pPage->fZeroed = false;
4797
4798#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4799 if (pPage->fDirty)
4800 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirty, false /* do not remove */);
4801#endif
4802
4803 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4804 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4805 fFlushRequired = true;
4806
4807 /*
4808 * Clear the page.
4809 */
4810 pgmPoolTrackClearPageUsers(pPool, pPage);
4811 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4812 pgmPoolTrackDeref(pPool, pPage);
4813 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4814
4815 /*
4816 * Flush it from the cache.
4817 */
4818 pgmPoolCacheFlushPage(pPool, pPage);
4819
4820#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4821 /* Heavy stuff done. */
4822 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
4823#endif
4824
4825 /*
4826 * Deregistering the monitoring.
4827 */
4828 if (pPage->fMonitored)
4829 rc = pgmPoolMonitorFlush(pPool, pPage);
4830
4831 /*
4832 * Free the page.
4833 */
4834 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4835 pPage->iNext = pPool->iFreeHead;
4836 pPool->iFreeHead = pPage->idx;
4837 pPage->enmKind = PGMPOOLKIND_FREE;
4838 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4839 pPage->GCPhys = NIL_RTGCPHYS;
4840 pPage->fReusedFlushPending = false;
4841
4842 pPool->cUsedPages--;
4843
4844 /* Flush the TLBs of all VCPUs if required. */
4845 if ( fFlushRequired
4846 && fFlush)
4847 {
4848 PGM_INVL_ALL_VCPU_TLBS(pVM);
4849 }
4850
4851 pgmUnlock(pVM);
4852 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4853 return rc;
4854}
4855
4856
4857/**
4858 * Frees a usage of a pool page.
4859 *
4860 * The caller is responsible to updating the user table so that it no longer
4861 * references the shadow page.
4862 *
4863 * @param pPool The pool.
4864 * @param HCPhys The HC physical address of the shadow page.
4865 * @param iUser The shadow page pool index of the user table.
4866 * @param iUserTable The index into the user table (shadowed).
4867 */
4868void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4869{
4870 PVM pVM = pPool->CTX_SUFF(pVM);
4871
4872 STAM_PROFILE_START(&pPool->StatFree, a);
4873 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
4874 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4875 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4876 pgmLock(pVM);
4877 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4878 if (!pPage->fCached)
4879 pgmPoolFlushPage(pPool, pPage);
4880 pgmUnlock(pVM);
4881 STAM_PROFILE_STOP(&pPool->StatFree, a);
4882}
4883
4884
4885/**
4886 * Makes one or more free page free.
4887 *
4888 * @returns VBox status code.
4889 * @retval VINF_SUCCESS on success.
4890 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4891 *
4892 * @param pPool The pool.
4893 * @param enmKind Page table kind
4894 * @param iUser The user of the page.
4895 */
4896static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4897{
4898 PVM pVM = pPool->CTX_SUFF(pVM);
4899
4900 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%d\n", iUser));
4901
4902 /*
4903 * If the pool isn't full grown yet, expand it.
4904 */
4905 if ( pPool->cCurPages < pPool->cMaxPages
4906#if defined(IN_RC)
4907 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
4908 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4909 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
4910#endif
4911 )
4912 {
4913 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4914#ifdef IN_RING3
4915 int rc = PGMR3PoolGrow(pVM);
4916#else
4917 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
4918#endif
4919 if (RT_FAILURE(rc))
4920 return rc;
4921 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4922 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4923 return VINF_SUCCESS;
4924 }
4925
4926 /*
4927 * Free one cached page.
4928 */
4929 return pgmPoolCacheFreeOne(pPool, iUser);
4930}
4931
4932/**
4933 * Allocates a page from the pool.
4934 *
4935 * This page may actually be a cached page and not in need of any processing
4936 * on the callers part.
4937 *
4938 * @returns VBox status code.
4939 * @retval VINF_SUCCESS if a NEW page was allocated.
4940 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4941 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4942 * @param pVM The VM handle.
4943 * @param GCPhys The GC physical address of the page we're gonna shadow.
4944 * For 4MB and 2MB PD entries, it's the first address the
4945 * shadow PT is covering.
4946 * @param enmKind The kind of mapping.
4947 * @param enmAccess Access type for the mapping (only relevant for big pages)
4948 * @param iUser The shadow page pool index of the user table.
4949 * @param iUserTable The index into the user table (shadowed).
4950 * @param fLockPage Lock the page
4951 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4952 */
4953int pgmPoolAllocEx(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable,
4954 bool fLockPage, PPPGMPOOLPAGE ppPage)
4955{
4956 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4957 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4958 LogFlow(("pgmPoolAllocEx: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4959 *ppPage = NULL;
4960 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4961 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4962 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4963
4964 pgmLock(pVM);
4965
4966 if (pPool->fCacheEnabled)
4967 {
4968 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, iUser, iUserTable, ppPage);
4969 if (RT_SUCCESS(rc2))
4970 {
4971 if (fLockPage)
4972 pgmPoolLockPage(pPool, *ppPage);
4973 pgmUnlock(pVM);
4974 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4975 LogFlow(("pgmPoolAllocEx: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4976 return rc2;
4977 }
4978 }
4979
4980 /*
4981 * Allocate a new one.
4982 */
4983 int rc = VINF_SUCCESS;
4984 uint16_t iNew = pPool->iFreeHead;
4985 if (iNew == NIL_PGMPOOL_IDX)
4986 {
4987 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
4988 if (RT_FAILURE(rc))
4989 {
4990 pgmUnlock(pVM);
4991 Log(("pgmPoolAllocEx: returns %Rrc (Free)\n", rc));
4992 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4993 return rc;
4994 }
4995 iNew = pPool->iFreeHead;
4996 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4997 }
4998
4999 /* unlink the free head */
5000 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5001 pPool->iFreeHead = pPage->iNext;
5002 pPage->iNext = NIL_PGMPOOL_IDX;
5003
5004 /*
5005 * Initialize it.
5006 */
5007 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5008 pPage->enmKind = enmKind;
5009 pPage->enmAccess = enmAccess;
5010 pPage->GCPhys = GCPhys;
5011 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5012 pPage->fMonitored = false;
5013 pPage->fCached = false;
5014#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5015 pPage->fDirty = false;
5016#endif
5017 pPage->fReusedFlushPending = false;
5018 pPage->cModifications = 0;
5019 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5020 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5021 pPage->cLocked = 0;
5022 pPage->cPresent = 0;
5023 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5024 pPage->pvLastAccessHandlerFault = 0;
5025 pPage->cLastAccessHandlerCount = 0;
5026 pPage->pvLastAccessHandlerRip = 0;
5027
5028 /*
5029 * Insert into the tracking and cache. If this fails, free the page.
5030 */
5031 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5032 if (RT_FAILURE(rc3))
5033 {
5034 pPool->cUsedPages--;
5035 pPage->enmKind = PGMPOOLKIND_FREE;
5036 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5037 pPage->GCPhys = NIL_RTGCPHYS;
5038 pPage->iNext = pPool->iFreeHead;
5039 pPool->iFreeHead = pPage->idx;
5040 pgmUnlock(pVM);
5041 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5042 Log(("pgmPoolAllocEx: returns %Rrc (Insert)\n", rc3));
5043 return rc3;
5044 }
5045
5046 /*
5047 * Commit the allocation, clear the page and return.
5048 */
5049#ifdef VBOX_WITH_STATISTICS
5050 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5051 pPool->cUsedPagesHigh = pPool->cUsedPages;
5052#endif
5053
5054 if (!pPage->fZeroed)
5055 {
5056 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5057 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5058 ASMMemZeroPage(pv);
5059 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5060 }
5061
5062 *ppPage = pPage;
5063 if (fLockPage)
5064 pgmPoolLockPage(pPool, pPage);
5065 pgmUnlock(pVM);
5066 LogFlow(("pgmPoolAllocEx: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5067 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5068 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5069 return rc;
5070}
5071
5072
5073/**
5074 * Frees a usage of a pool page.
5075 *
5076 * @param pVM The VM handle.
5077 * @param HCPhys The HC physical address of the shadow page.
5078 * @param iUser The shadow page pool index of the user table.
5079 * @param iUserTable The index into the user table (shadowed).
5080 */
5081void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5082{
5083 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5084 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5085 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5086}
5087
5088/**
5089 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5090 *
5091 * @returns Pointer to the shadow page structure.
5092 * @param pPool The pool.
5093 * @param HCPhys The HC physical address of the shadow page.
5094 */
5095PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5096{
5097 PVM pVM = pPool->CTX_SUFF(pVM);
5098 PGM_LOCK_ASSERT_OWNER(pVM);
5099
5100 /*
5101 * Look up the page.
5102 */
5103 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5104
5105 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5106 return pPage;
5107}
5108
5109
5110/**
5111 * Internal worker for finding a page for debugging purposes, no assertions.
5112 *
5113 * @returns Pointer to the shadow page structure. NULL on if not found.
5114 * @param pPool The pool.
5115 * @param HCPhys The HC physical address of the shadow page.
5116 */
5117PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5118{
5119 PVM pVM = pPool->CTX_SUFF(pVM);
5120 PGM_LOCK_ASSERT_OWNER(pVM);
5121 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5122}
5123
5124
5125#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5126/**
5127 * Flush the specified page if present
5128 *
5129 * @param pVM The VM handle.
5130 * @param GCPhys Guest physical address of the page to flush
5131 */
5132void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5133{
5134 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5135
5136 VM_ASSERT_EMT(pVM);
5137
5138 /*
5139 * Look up the GCPhys in the hash.
5140 */
5141 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5142 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5143 if (i == NIL_PGMPOOL_IDX)
5144 return;
5145
5146 do
5147 {
5148 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5149 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5150 {
5151 switch (pPage->enmKind)
5152 {
5153 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5154 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5155 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5156 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5157 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5158 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5159 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5160 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5161 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5162 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5163 case PGMPOOLKIND_64BIT_PML4:
5164 case PGMPOOLKIND_32BIT_PD:
5165 case PGMPOOLKIND_PAE_PDPT:
5166 {
5167 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5168#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5169 if (pPage->fDirty)
5170 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5171 else
5172#endif
5173 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5174 Assert(!pgmPoolIsPageLocked(pPage));
5175 pgmPoolMonitorChainFlush(pPool, pPage);
5176 return;
5177 }
5178
5179 /* ignore, no monitoring. */
5180 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5181 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5182 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5183 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5184 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5185 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5186 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5187 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5188 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5189 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5190 case PGMPOOLKIND_ROOT_NESTED:
5191 case PGMPOOLKIND_PAE_PD_PHYS:
5192 case PGMPOOLKIND_PAE_PDPT_PHYS:
5193 case PGMPOOLKIND_32BIT_PD_PHYS:
5194 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5195 break;
5196
5197 default:
5198 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5199 }
5200 }
5201
5202 /* next */
5203 i = pPage->iNext;
5204 } while (i != NIL_PGMPOOL_IDX);
5205 return;
5206}
5207#endif /* IN_RING3 */
5208
5209#ifdef IN_RING3
5210
5211
5212/**
5213 * Reset CPU on hot plugging.
5214 *
5215 * @param pVM The VM handle.
5216 * @param pVCpu The virtual CPU.
5217 */
5218void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5219{
5220 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
5221
5222 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5223 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5224 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5225}
5226
5227
5228/**
5229 * Flushes the entire cache.
5230 *
5231 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5232 * this and execute this CR3 flush.
5233 *
5234 * @param pPool The pool.
5235 */
5236void pgmR3PoolReset(PVM pVM)
5237{
5238 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5239
5240 PGM_LOCK_ASSERT_OWNER(pVM);
5241 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5242 LogFlow(("pgmR3PoolReset:\n"));
5243
5244 /*
5245 * If there are no pages in the pool, there is nothing to do.
5246 */
5247 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5248 {
5249 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5250 return;
5251 }
5252
5253 /*
5254 * Exit the shadow mode since we're going to clear everything,
5255 * including the root page.
5256 */
5257 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5258 {
5259 PVMCPU pVCpu = &pVM->aCpus[i];
5260 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
5261 }
5262
5263 /*
5264 * Nuke the free list and reinsert all pages into it.
5265 */
5266 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5267 {
5268 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5269
5270 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
5271 if (pPage->fMonitored)
5272 pgmPoolMonitorFlush(pPool, pPage);
5273 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5274 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5275 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5276 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5277 pPage->cModifications = 0;
5278 pPage->GCPhys = NIL_RTGCPHYS;
5279 pPage->enmKind = PGMPOOLKIND_FREE;
5280 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5281 Assert(pPage->idx == i);
5282 pPage->iNext = i + 1;
5283 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5284 pPage->fSeenNonGlobal = false;
5285 pPage->fMonitored = false;
5286#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5287 pPage->fDirty = false;
5288#endif
5289 pPage->fCached = false;
5290 pPage->fReusedFlushPending = false;
5291 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5292 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5293 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5294 pPage->cLocked = 0;
5295 }
5296 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5297 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5298 pPool->cUsedPages = 0;
5299
5300 /*
5301 * Zap and reinitialize the user records.
5302 */
5303 pPool->cPresent = 0;
5304 pPool->iUserFreeHead = 0;
5305 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5306 const unsigned cMaxUsers = pPool->cMaxUsers;
5307 for (unsigned i = 0; i < cMaxUsers; i++)
5308 {
5309 paUsers[i].iNext = i + 1;
5310 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5311 paUsers[i].iUserTable = 0xfffffffe;
5312 }
5313 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5314
5315 /*
5316 * Clear all the GCPhys links and rebuild the phys ext free list.
5317 */
5318 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
5319 pRam;
5320 pRam = pRam->CTX_SUFF(pNext))
5321 {
5322 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5323 while (iPage-- > 0)
5324 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5325 }
5326
5327 pPool->iPhysExtFreeHead = 0;
5328 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5329 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5330 for (unsigned i = 0; i < cMaxPhysExts; i++)
5331 {
5332 paPhysExts[i].iNext = i + 1;
5333 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5334 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5335 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5336 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5337 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5338 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5339 }
5340 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5341
5342 /*
5343 * Just zap the modified list.
5344 */
5345 pPool->cModifiedPages = 0;
5346 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5347
5348 /*
5349 * Clear the GCPhys hash and the age list.
5350 */
5351 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5352 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5353 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5354 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5355
5356#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5357 /* Clear all dirty pages. */
5358 pPool->idxFreeDirtyPage = 0;
5359 pPool->cDirtyPages = 0;
5360 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
5361 pPool->aDirtyPages[i].uIdx = NIL_PGMPOOL_IDX;
5362#endif
5363
5364 /*
5365 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5366 */
5367 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
5368 {
5369 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5370 pPage->iNext = NIL_PGMPOOL_IDX;
5371 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5372 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5373 pPage->cModifications = 0;
5374 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
5375 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5376 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5377 if (pPage->fMonitored)
5378 {
5379 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK,
5380 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
5381 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
5382 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
5383 pPool->pszAccessHandler);
5384 AssertFatalRCSuccess(rc);
5385 pgmPoolHashInsert(pPool, pPage);
5386 }
5387 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
5388 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
5389 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
5390 }
5391
5392 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5393 {
5394 /*
5395 * Re-enter the shadowing mode and assert Sync CR3 FF.
5396 */
5397 PVMCPU pVCpu = &pVM->aCpus[i];
5398 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5399 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5400 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5401 }
5402
5403 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5404}
5405#endif /* IN_RING3 */
5406
5407#ifdef LOG_ENABLED
5408static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5409{
5410 switch(enmKind)
5411 {
5412 case PGMPOOLKIND_INVALID:
5413 return "PGMPOOLKIND_INVALID";
5414 case PGMPOOLKIND_FREE:
5415 return "PGMPOOLKIND_FREE";
5416 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5417 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5418 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5419 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5420 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5421 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5422 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5423 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5424 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5425 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5426 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5427 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5428 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5429 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5430 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5431 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5432 case PGMPOOLKIND_32BIT_PD:
5433 return "PGMPOOLKIND_32BIT_PD";
5434 case PGMPOOLKIND_32BIT_PD_PHYS:
5435 return "PGMPOOLKIND_32BIT_PD_PHYS";
5436 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5437 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5438 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5439 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5440 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5441 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5442 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5443 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5444 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5445 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5446 case PGMPOOLKIND_PAE_PD_PHYS:
5447 return "PGMPOOLKIND_PAE_PD_PHYS";
5448 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5449 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5450 case PGMPOOLKIND_PAE_PDPT:
5451 return "PGMPOOLKIND_PAE_PDPT";
5452 case PGMPOOLKIND_PAE_PDPT_PHYS:
5453 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5454 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5455 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5456 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5457 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5458 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5459 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5460 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5461 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5462 case PGMPOOLKIND_64BIT_PML4:
5463 return "PGMPOOLKIND_64BIT_PML4";
5464 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5465 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5466 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5467 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5468 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5469 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5470 case PGMPOOLKIND_ROOT_NESTED:
5471 return "PGMPOOLKIND_ROOT_NESTED";
5472 }
5473 return "Unknown kind!";
5474}
5475#endif /* LOG_ENABLED*/
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette