VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 56034

Last change on this file since 56034 was 56017, checked in by vboxsync, 10 years ago

Physical access handler cleanups.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 212.7 KB
Line 
1/* $Id: PGMAllPool.cpp 56017 2015-05-21 18:14:21Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2013 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#include <VBox/vmm/pgm.h>
24#include <VBox/vmm/mm.h>
25#include <VBox/vmm/em.h>
26#include <VBox/vmm/cpum.h>
27#ifdef IN_RC
28# include <VBox/vmm/patm.h>
29#endif
30#include "PGMInternal.h"
31#include <VBox/vmm/vm.h>
32#include "PGMInline.h"
33#include <VBox/disopcode.h>
34#include <VBox/vmm/hm_vmx.h>
35
36#include <VBox/log.h>
37#include <VBox/err.h>
38#include <iprt/asm.h>
39#include <iprt/asm-amd64-x86.h>
40#include <iprt/string.h>
41
42
43/*******************************************************************************
44* Internal Functions *
45*******************************************************************************/
46RT_C_DECLS_BEGIN
47DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
48DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
49static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
50static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
51static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
52static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
54static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
55#endif
56#if 0 /*defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)*/
57static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
58#endif
59
60int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
61PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
62void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
63void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
64
65RT_C_DECLS_END
66
67
68/**
69 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
70 *
71 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
72 * @param enmKind The page kind.
73 */
74DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
75{
76 switch (enmKind)
77 {
78 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
79 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
81 return true;
82 default:
83 return false;
84 }
85}
86
87
88/**
89 * Flushes a chain of pages sharing the same access monitor.
90 *
91 * @returns VBox status code suitable for scheduling.
92 * @param pPool The pool.
93 * @param pPage A page in the chain.
94 * @todo VBOXSTRICTRC
95 */
96int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
97{
98 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
99
100 /*
101 * Find the list head.
102 */
103 uint16_t idx = pPage->idx;
104 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
105 {
106 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
107 {
108 idx = pPage->iMonitoredPrev;
109 Assert(idx != pPage->idx);
110 pPage = &pPool->aPages[idx];
111 }
112 }
113
114 /*
115 * Iterate the list flushing each shadow page.
116 */
117 int rc = VINF_SUCCESS;
118 for (;;)
119 {
120 idx = pPage->iMonitoredNext;
121 Assert(idx != pPage->idx);
122 if (pPage->idx >= PGMPOOL_IDX_FIRST)
123 {
124 int rc2 = pgmPoolFlushPage(pPool, pPage);
125 AssertRC(rc2);
126 }
127 /* next */
128 if (idx == NIL_PGMPOOL_IDX)
129 break;
130 pPage = &pPool->aPages[idx];
131 }
132 return rc;
133}
134
135
136/**
137 * Wrapper for getting the current context pointer to the entry being modified.
138 *
139 * @returns VBox status code suitable for scheduling.
140 * @param pVM Pointer to the VM.
141 * @param pvDst Destination address
142 * @param pvSrc Pointer to the mapping of @a GCPhysSrc or NULL depending
143 * on the context (e.g. \#PF in R0 & RC).
144 * @param GCPhysSrc The source guest physical address.
145 * @param cb Size of data to read
146 */
147DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, void const *pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
148{
149#if defined(IN_RING3)
150 NOREF(pVM); NOREF(GCPhysSrc);
151 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
152 return VINF_SUCCESS;
153#else
154 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
155 NOREF(pvSrc);
156 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
157#endif
158}
159
160
161/**
162 * Process shadow entries before they are changed by the guest.
163 *
164 * For PT entries we will clear them. For PD entries, we'll simply check
165 * for mapping conflicts and set the SyncCR3 FF if found.
166 *
167 * @param pVCpu Pointer to the VMCPU.
168 * @param pPool The pool.
169 * @param pPage The head page.
170 * @param GCPhysFault The guest physical fault address.
171 * @param pvAddress Pointer to the mapping of @a GCPhysFault or NULL
172 * depending on the context (e.g. \#PF in R0 & RC).
173 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
174 */
175static void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault,
176 void const *pvAddress, unsigned cbWrite)
177{
178 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
179 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
180 PVM pVM = pPool->CTX_SUFF(pVM);
181 NOREF(pVCpu);
182
183 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))pvAddress, GCPhysFault, cbWrite));
184
185 for (;;)
186 {
187 union
188 {
189 void *pv;
190 PX86PT pPT;
191 PPGMSHWPTPAE pPTPae;
192 PX86PD pPD;
193 PX86PDPAE pPDPae;
194 PX86PDPT pPDPT;
195 PX86PML4 pPML4;
196 } uShw;
197
198 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s\n", pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
199
200 uShw.pv = NULL;
201 switch (pPage->enmKind)
202 {
203 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
204 {
205 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
206 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
207 const unsigned iShw = off / sizeof(X86PTE);
208 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
209 if (uShw.pPT->a[iShw].n.u1Present)
210 {
211 X86PTE GstPte;
212
213 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
214 AssertRC(rc);
215 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
216 pgmPoolTracDerefGCPhysHint(pPool, pPage,
217 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
218 GstPte.u & X86_PTE_PG_MASK,
219 iShw);
220 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
221 }
222 break;
223 }
224
225 /* page/2 sized */
226 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
227 {
228 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
229 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
230 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
231 {
232 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
233 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
234 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
235 {
236 X86PTE GstPte;
237 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
238 AssertRC(rc);
239
240 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
241 pgmPoolTracDerefGCPhysHint(pPool, pPage,
242 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
243 GstPte.u & X86_PTE_PG_MASK,
244 iShw);
245 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
246 }
247 }
248 break;
249 }
250
251 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
252 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
253 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
254 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
255 {
256 unsigned iGst = off / sizeof(X86PDE);
257 unsigned iShwPdpt = iGst / 256;
258 unsigned iShw = (iGst % 256) * 2;
259 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
260
261 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
262 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
263 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
264 {
265 for (unsigned i = 0; i < 2; i++)
266 {
267# ifdef VBOX_WITH_RAW_MODE_NOT_R0
268 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
269 {
270 Assert(pgmMapAreMappingsEnabled(pVM));
271 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
272 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
273 break;
274 }
275# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
276 if (uShw.pPDPae->a[iShw+i].n.u1Present)
277 {
278 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
279 pgmPoolFree(pVM,
280 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
281 pPage->idx,
282 iShw + i);
283 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw+i].u, 0);
284 }
285
286 /* paranoia / a bit assumptive. */
287 if ( (off & 3)
288 && (off & 3) + cbWrite > 4)
289 {
290 const unsigned iShw2 = iShw + 2 + i;
291 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
292 {
293# ifdef VBOX_WITH_RAW_MODE_NOT_R0
294 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
295 {
296 Assert(pgmMapAreMappingsEnabled(pVM));
297 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
298 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
299 break;
300 }
301# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
302 if (uShw.pPDPae->a[iShw2].n.u1Present)
303 {
304 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
305 pgmPoolFree(pVM,
306 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
307 pPage->idx,
308 iShw2);
309 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
310 }
311 }
312 }
313 }
314 }
315 break;
316 }
317
318 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
319 {
320 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
321 const unsigned iShw = off / sizeof(X86PTEPAE);
322 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
323 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
324 {
325 X86PTEPAE GstPte;
326 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
327 AssertRC(rc);
328
329 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
330 pgmPoolTracDerefGCPhysHint(pPool, pPage,
331 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
332 GstPte.u & X86_PTE_PAE_PG_MASK,
333 iShw);
334 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
335 }
336
337 /* paranoia / a bit assumptive. */
338 if ( (off & 7)
339 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
340 {
341 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
342 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
343
344 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
345 {
346 X86PTEPAE GstPte;
347 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
348 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
349 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
350 AssertRC(rc);
351 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
352 pgmPoolTracDerefGCPhysHint(pPool, pPage,
353 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
354 GstPte.u & X86_PTE_PAE_PG_MASK,
355 iShw2);
356 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
357 }
358 }
359 break;
360 }
361
362 case PGMPOOLKIND_32BIT_PD:
363 {
364 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
365 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
366
367 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
368 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
369# ifdef VBOX_WITH_RAW_MODE_NOT_R0
370 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
371 {
372 Assert(pgmMapAreMappingsEnabled(pVM));
373 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
374 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
375 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
376 break;
377 }
378 else
379# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
380 {
381 if (uShw.pPD->a[iShw].n.u1Present)
382 {
383 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
384 pgmPoolFree(pVM,
385 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
386 pPage->idx,
387 iShw);
388 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
389 }
390 }
391 /* paranoia / a bit assumptive. */
392 if ( (off & 3)
393 && (off & 3) + cbWrite > sizeof(X86PTE))
394 {
395 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
396 if ( iShw2 != iShw
397 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
398 {
399# ifdef VBOX_WITH_RAW_MODE_NOT_R0
400 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
401 {
402 Assert(pgmMapAreMappingsEnabled(pVM));
403 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
404 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
405 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
406 break;
407 }
408# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
409 if (uShw.pPD->a[iShw2].n.u1Present)
410 {
411 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
412 pgmPoolFree(pVM,
413 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
414 pPage->idx,
415 iShw2);
416 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
417 }
418 }
419 }
420#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). - not working any longer... */
421 if ( uShw.pPD->a[iShw].n.u1Present
422 && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
423 {
424 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
425# ifdef IN_RC /* TLB load - we're pushing things a bit... */
426 ASMProbeReadByte(pvAddress);
427# endif
428 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
429 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
430 }
431#endif
432 break;
433 }
434
435 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
436 {
437 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
438 const unsigned iShw = off / sizeof(X86PDEPAE);
439 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
440#ifdef VBOX_WITH_RAW_MODE_NOT_R0
441 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
442 {
443 Assert(pgmMapAreMappingsEnabled(pVM));
444 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
445 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
446 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
447 break;
448 }
449#endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
450 /*
451 * Causes trouble when the guest uses a PDE to refer to the whole page table level
452 * structure. (Invalidate here; faults later on when it tries to change the page
453 * table entries -> recheck; probably only applies to the RC case.)
454 */
455#ifdef VBOX_WITH_RAW_MODE_NOT_R0
456 else
457#endif
458 {
459 if (uShw.pPDPae->a[iShw].n.u1Present)
460 {
461 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
462 pgmPoolFree(pVM,
463 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
464 pPage->idx,
465 iShw);
466 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
467 }
468 }
469 /* paranoia / a bit assumptive. */
470 if ( (off & 7)
471 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
472 {
473 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
474 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
475
476#ifdef VBOX_WITH_RAW_MODE_NOT_R0
477 if ( iShw2 != iShw
478 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
479 {
480 Assert(pgmMapAreMappingsEnabled(pVM));
481 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
482 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
483 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
484 break;
485 }
486 else
487#endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
488 if (uShw.pPDPae->a[iShw2].n.u1Present)
489 {
490 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
491 pgmPoolFree(pVM,
492 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
493 pPage->idx,
494 iShw2);
495 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
496 }
497 }
498 break;
499 }
500
501 case PGMPOOLKIND_PAE_PDPT:
502 {
503 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
504 /*
505 * Hopefully this doesn't happen very often:
506 * - touching unused parts of the page
507 * - messing with the bits of pd pointers without changing the physical address
508 */
509 /* PDPT roots are not page aligned; 32 byte only! */
510 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
511
512 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
513 const unsigned iShw = offPdpt / sizeof(X86PDPE);
514 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
515 {
516# ifdef VBOX_WITH_RAW_MODE_NOT_R0
517 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
518 {
519 Assert(pgmMapAreMappingsEnabled(pVM));
520 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
521 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
522 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
523 break;
524 }
525 else
526# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
527 if (uShw.pPDPT->a[iShw].n.u1Present)
528 {
529 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
530 pgmPoolFree(pVM,
531 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
532 pPage->idx,
533 iShw);
534 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
535 }
536
537 /* paranoia / a bit assumptive. */
538 if ( (offPdpt & 7)
539 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
540 {
541 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
542 if ( iShw2 != iShw
543 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
544 {
545# ifdef VBOX_WITH_RAW_MODE_NOT_R0
546 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
547 {
548 Assert(pgmMapAreMappingsEnabled(pVM));
549 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
550 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
551 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
552 break;
553 }
554 else
555# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
556 if (uShw.pPDPT->a[iShw2].n.u1Present)
557 {
558 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
559 pgmPoolFree(pVM,
560 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
561 pPage->idx,
562 iShw2);
563 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
564 }
565 }
566 }
567 }
568 break;
569 }
570
571#ifndef IN_RC
572 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
573 {
574 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
575 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
576 const unsigned iShw = off / sizeof(X86PDEPAE);
577 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
578 if (uShw.pPDPae->a[iShw].n.u1Present)
579 {
580 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
581 pgmPoolFree(pVM,
582 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
583 pPage->idx,
584 iShw);
585 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
586 }
587 /* paranoia / a bit assumptive. */
588 if ( (off & 7)
589 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
590 {
591 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
592 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
593
594 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
595 if (uShw.pPDPae->a[iShw2].n.u1Present)
596 {
597 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
598 pgmPoolFree(pVM,
599 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
600 pPage->idx,
601 iShw2);
602 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
603 }
604 }
605 break;
606 }
607
608 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
609 {
610 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
611 /*
612 * Hopefully this doesn't happen very often:
613 * - messing with the bits of pd pointers without changing the physical address
614 */
615 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
616 const unsigned iShw = off / sizeof(X86PDPE);
617 if (uShw.pPDPT->a[iShw].n.u1Present)
618 {
619 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
620 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
621 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
622 }
623 /* paranoia / a bit assumptive. */
624 if ( (off & 7)
625 && (off & 7) + cbWrite > sizeof(X86PDPE))
626 {
627 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
628 if (uShw.pPDPT->a[iShw2].n.u1Present)
629 {
630 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
631 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
632 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
633 }
634 }
635 break;
636 }
637
638 case PGMPOOLKIND_64BIT_PML4:
639 {
640 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
641 /*
642 * Hopefully this doesn't happen very often:
643 * - messing with the bits of pd pointers without changing the physical address
644 */
645 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
646 const unsigned iShw = off / sizeof(X86PDPE);
647 if (uShw.pPML4->a[iShw].n.u1Present)
648 {
649 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
650 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
651 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
652 }
653 /* paranoia / a bit assumptive. */
654 if ( (off & 7)
655 && (off & 7) + cbWrite > sizeof(X86PDPE))
656 {
657 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
658 if (uShw.pPML4->a[iShw2].n.u1Present)
659 {
660 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
661 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
662 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
663 }
664 }
665 break;
666 }
667#endif /* IN_RING0 */
668
669 default:
670 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
671 }
672 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
673
674 /* next */
675 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
676 return;
677 pPage = &pPool->aPages[pPage->iMonitoredNext];
678 }
679}
680
681# ifndef IN_RING3
682
683/**
684 * Checks if a access could be a fork operation in progress.
685 *
686 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
687 *
688 * @returns true if it's likely that we're forking, otherwise false.
689 * @param pPool The pool.
690 * @param pDis The disassembled instruction.
691 * @param offFault The access offset.
692 */
693DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
694{
695 /*
696 * i386 linux is using btr to clear X86_PTE_RW.
697 * The functions involved are (2.6.16 source inspection):
698 * clear_bit
699 * ptep_set_wrprotect
700 * copy_one_pte
701 * copy_pte_range
702 * copy_pmd_range
703 * copy_pud_range
704 * copy_page_range
705 * dup_mmap
706 * dup_mm
707 * copy_mm
708 * copy_process
709 * do_fork
710 */
711 if ( pDis->pCurInstr->uOpcode == OP_BTR
712 && !(offFault & 4)
713 /** @todo Validate that the bit index is X86_PTE_RW. */
714 )
715 {
716 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
717 return true;
718 }
719 return false;
720}
721
722
723/**
724 * Determine whether the page is likely to have been reused.
725 *
726 * @returns true if we consider the page as being reused for a different purpose.
727 * @returns false if we consider it to still be a paging page.
728 * @param pVM Pointer to the VM.
729 * @param pVCpu Pointer to the VMCPU.
730 * @param pRegFrame Trap register frame.
731 * @param pDis The disassembly info for the faulting instruction.
732 * @param pvFault The fault address.
733 *
734 * @remark The REP prefix check is left to the caller because of STOSD/W.
735 */
736DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
737{
738#ifndef IN_RC
739 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
740 if ( HMHasPendingIrq(pVM)
741 && (pRegFrame->rsp - pvFault) < 32)
742 {
743 /* Fault caused by stack writes while trying to inject an interrupt event. */
744 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
745 return true;
746 }
747#else
748 NOREF(pVM); NOREF(pvFault);
749#endif
750
751 LogFlow(("Reused instr %RGv %d at %RGv param1.fUse=%llx param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->uOpcode, pvFault, pDis->Param1.fUse, pDis->Param1.Base.idxGenReg));
752
753 /* Non-supervisor mode write means it's used for something else. */
754 if (CPUMGetGuestCPL(pVCpu) == 3)
755 return true;
756
757 switch (pDis->pCurInstr->uOpcode)
758 {
759 /* call implies the actual push of the return address faulted */
760 case OP_CALL:
761 Log4(("pgmPoolMonitorIsReused: CALL\n"));
762 return true;
763 case OP_PUSH:
764 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
765 return true;
766 case OP_PUSHF:
767 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
768 return true;
769 case OP_PUSHA:
770 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
771 return true;
772 case OP_FXSAVE:
773 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
774 return true;
775 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
776 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
777 return true;
778 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
779 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
780 return true;
781 case OP_MOVSWD:
782 case OP_STOSWD:
783 if ( pDis->fPrefix == (DISPREFIX_REP|DISPREFIX_REX)
784 && pRegFrame->rcx >= 0x40
785 )
786 {
787 Assert(pDis->uCpuMode == DISCPUMODE_64BIT);
788
789 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
790 return true;
791 }
792 return false;
793 }
794 if ( ( (pDis->Param1.fUse & DISUSE_REG_GEN32)
795 || (pDis->Param1.fUse & DISUSE_REG_GEN64))
796 && (pDis->Param1.Base.idxGenReg == DISGREG_ESP))
797 {
798 Log4(("pgmPoolMonitorIsReused: ESP\n"));
799 return true;
800 }
801
802 return false;
803}
804
805
806/**
807 * Flushes the page being accessed.
808 *
809 * @returns VBox status code suitable for scheduling.
810 * @param pVM Pointer to the VM.
811 * @param pVCpu Pointer to the VMCPU.
812 * @param pPool The pool.
813 * @param pPage The pool page (head).
814 * @param pDis The disassembly of the write instruction.
815 * @param pRegFrame The trap register frame.
816 * @param GCPhysFault The fault address as guest physical address.
817 * @param pvFault The fault address.
818 * @todo VBOXSTRICTRC
819 */
820static int pgmPoolAccessPfHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
821 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
822{
823 NOREF(pVM); NOREF(GCPhysFault);
824
825 /*
826 * First, do the flushing.
827 */
828 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
829
830 /*
831 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
832 * Must do this in raw mode (!); XP boot will fail otherwise.
833 */
834 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
835 if (rc2 == VINF_SUCCESS)
836 { /* do nothing */ }
837#ifdef VBOX_WITH_IEM
838 else if (rc2 == VINF_EM_RESCHEDULE)
839 {
840 if (rc == VINF_SUCCESS)
841 rc = VBOXSTRICTRC_VAL(rc2);
842# ifndef IN_RING3
843 VMCPU_FF_SET(pVCpu, VMCPU_FF_TO_R3);
844# endif
845 }
846#endif
847 else if (rc2 == VERR_EM_INTERPRETER)
848 {
849#ifdef IN_RC
850 if (PATMIsPatchGCAddr(pVM, pRegFrame->eip))
851 {
852 LogFlow(("pgmPoolAccessPfHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
853 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->eip));
854 rc = VINF_SUCCESS;
855 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
856 }
857 else
858#endif
859 {
860 rc = VINF_EM_RAW_EMULATE_INSTR;
861 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
862 }
863 }
864 else if (RT_FAILURE_NP(rc2))
865 rc = VBOXSTRICTRC_VAL(rc2);
866 else
867 AssertMsgFailed(("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
868
869 LogFlow(("pgmPoolAccessPfHandlerPT: returns %Rrc (flushed)\n", rc));
870 return rc;
871}
872
873
874/**
875 * Handles the STOSD write accesses.
876 *
877 * @returns VBox status code suitable for scheduling.
878 * @param pVM Pointer to the VM.
879 * @param pPool The pool.
880 * @param pPage The pool page (head).
881 * @param pDis The disassembly of the write instruction.
882 * @param pRegFrame The trap register frame.
883 * @param GCPhysFault The fault address as guest physical address.
884 * @param pvFault The fault address.
885 */
886DECLINLINE(int) pgmPoolAccessPfHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
887 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
888{
889 unsigned uIncrement = pDis->Param1.cb;
890 NOREF(pVM);
891
892 Assert(pDis->uCpuMode == DISCPUMODE_32BIT || pDis->uCpuMode == DISCPUMODE_64BIT);
893 Assert(pRegFrame->rcx <= 0x20);
894
895#ifdef VBOX_STRICT
896 if (pDis->uOpMode == DISCPUMODE_32BIT)
897 Assert(uIncrement == 4);
898 else
899 Assert(uIncrement == 8);
900#endif
901
902 Log3(("pgmPoolAccessPfHandlerSTOSD\n"));
903
904 /*
905 * Increment the modification counter and insert it into the list
906 * of modified pages the first time.
907 */
908 if (!pPage->cModifications++)
909 pgmPoolMonitorModifiedInsert(pPool, pPage);
910
911 /*
912 * Execute REP STOSD.
913 *
914 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
915 * write situation, meaning that it's safe to write here.
916 */
917 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
918 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
919 while (pRegFrame->rcx)
920 {
921#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
922 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
923 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
924 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
925#else
926 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
927#endif
928#ifdef IN_RC
929 *(uint32_t *)(uintptr_t)pu32 = pRegFrame->eax;
930#else
931 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
932#endif
933 pu32 += uIncrement;
934 GCPhysFault += uIncrement;
935 pRegFrame->rdi += uIncrement;
936 pRegFrame->rcx--;
937 }
938 pRegFrame->rip += pDis->cbInstr;
939
940 LogFlow(("pgmPoolAccessPfHandlerSTOSD: returns\n"));
941 return VINF_SUCCESS;
942}
943
944
945/**
946 * Handles the simple write accesses.
947 *
948 * @returns VBox status code suitable for scheduling.
949 * @param pVM Pointer to the VM.
950 * @param pVCpu Pointer to the VMCPU.
951 * @param pPool The pool.
952 * @param pPage The pool page (head).
953 * @param pDis The disassembly of the write instruction.
954 * @param pRegFrame The trap register frame.
955 * @param GCPhysFault The fault address as guest physical address.
956 * @param pvFault The fault address.
957 * @param pfReused Reused state (in/out)
958 */
959DECLINLINE(int) pgmPoolAccessPfHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
960 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
961{
962 Log3(("pgmPoolAccessPfHandlerSimple\n"));
963 NOREF(pVM);
964 NOREF(pfReused); /* initialized by caller */
965
966 /*
967 * Increment the modification counter and insert it into the list
968 * of modified pages the first time.
969 */
970 if (!pPage->cModifications++)
971 pgmPoolMonitorModifiedInsert(pPool, pPage);
972
973 /*
974 * Clear all the pages. ASSUMES that pvFault is readable.
975 */
976#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
977 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
978#endif
979
980 uint32_t cbWrite = DISGetParamSize(pDis, &pDis->Param1);
981 if (cbWrite <= 8)
982 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, cbWrite);
983 else
984 {
985 Assert(cbWrite <= 16);
986 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, 8);
987 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + 8, NULL, cbWrite - 8);
988 }
989
990#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
991 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
992#endif
993
994 /*
995 * Interpret the instruction.
996 */
997 VBOXSTRICTRC rc = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
998 if (RT_SUCCESS(rc))
999 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
1000 else if (rc == VERR_EM_INTERPRETER)
1001 {
1002 LogFlow(("pgmPoolAccessPfHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1003 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode));
1004 rc = VINF_EM_RAW_EMULATE_INSTR;
1005 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1006 }
1007
1008#if 0 /* experimental code */
1009 if (rc == VINF_SUCCESS)
1010 {
1011 switch (pPage->enmKind)
1012 {
1013 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1014 {
1015 X86PTEPAE GstPte;
1016 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1017 AssertRC(rc);
1018
1019 /* Check the new value written by the guest. If present and with a bogus physical address, then
1020 * it's fairly safe to assume the guest is reusing the PT.
1021 */
1022 if (GstPte.n.u1Present)
1023 {
1024 RTHCPHYS HCPhys = -1;
1025 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1026 if (rc != VINF_SUCCESS)
1027 {
1028 *pfReused = true;
1029 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1030 }
1031 }
1032 break;
1033 }
1034 }
1035 }
1036#endif
1037
1038 LogFlow(("pgmPoolAccessPfHandlerSimple: returns %Rrc\n", VBOXSTRICTRC_VAL(rc)));
1039 return VBOXSTRICTRC_VAL(rc);
1040}
1041
1042
1043/**
1044 * @callback_method_impl{FNPGMRZPHYSPFHANDLER,
1045 * \#PF access handler callback for page table pages.}
1046 *
1047 * @remarks The @a pvUser argument points to the PGMPOOLPAGE.
1048 */
1049DECLEXPORT(VBOXSTRICTRC) pgmPoolAccessPfHandler(PVM pVM, PVMCPU pVCpu, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame,
1050 RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1051{
1052 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1053 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1054 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1055 unsigned cMaxModifications;
1056 bool fForcedFlush = false;
1057 NOREF(uErrorCode);
1058
1059 LogFlow(("pgmPoolAccessPfHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1060
1061 pgmLock(pVM);
1062 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1063 {
1064 /* Pool page changed while we were waiting for the lock; ignore. */
1065 Log(("CPU%d: pgmPoolAccessPfHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1066 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1067 pgmUnlock(pVM);
1068 return VINF_SUCCESS;
1069 }
1070#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1071 if (pPage->fDirty)
1072 {
1073 Assert(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH));
1074 pgmUnlock(pVM);
1075 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1076 }
1077#endif
1078
1079#if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1080 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1081 {
1082 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1083 void *pvGst;
1084 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1085 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1086 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1087 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1088 }
1089#endif
1090
1091 /*
1092 * Disassemble the faulting instruction.
1093 */
1094 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1095 int rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
1096 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1097 {
1098 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1099 pgmUnlock(pVM);
1100 return rc;
1101 }
1102
1103 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1104
1105 /*
1106 * We should ALWAYS have the list head as user parameter. This
1107 * is because we use that page to record the changes.
1108 */
1109 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1110
1111#ifdef IN_RING0
1112 /* Maximum nr of modifications depends on the page type. */
1113 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1114 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1115 cMaxModifications = 4;
1116 else
1117 cMaxModifications = 24;
1118#else
1119 cMaxModifications = 48;
1120#endif
1121
1122 /*
1123 * Incremental page table updates should weigh more than random ones.
1124 * (Only applies when started from offset 0)
1125 */
1126 pVCpu->pgm.s.cPoolAccessHandler++;
1127 if ( pPage->GCPtrLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1128 && pPage->GCPtrLastAccessHandlerRip < pRegFrame->rip + 0x40
1129 && pvFault == (pPage->GCPtrLastAccessHandlerFault + pDis->Param1.cb)
1130 && pVCpu->pgm.s.cPoolAccessHandler == pPage->cLastAccessHandler + 1)
1131 {
1132 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1133 Assert(pPage->cModifications < 32000);
1134 pPage->cModifications = pPage->cModifications * 2;
1135 pPage->GCPtrLastAccessHandlerFault = pvFault;
1136 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1137 if (pPage->cModifications >= cMaxModifications)
1138 {
1139 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1140 fForcedFlush = true;
1141 }
1142 }
1143
1144 if (pPage->cModifications >= cMaxModifications)
1145 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1146
1147 /*
1148 * Check if it's worth dealing with.
1149 */
1150 bool fReused = false;
1151 bool fNotReusedNotForking = false;
1152 if ( ( pPage->cModifications < cMaxModifications /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1153 || pgmPoolIsPageLocked(pPage)
1154 )
1155 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1156 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1157 {
1158 /*
1159 * Simple instructions, no REP prefix.
1160 */
1161 if (!(pDis->fPrefix & (DISPREFIX_REP | DISPREFIX_REPNE)))
1162 {
1163 rc = pgmPoolAccessPfHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1164 if (fReused)
1165 goto flushPage;
1166
1167 /* A mov instruction to change the first page table entry will be remembered so we can detect
1168 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1169 */
1170 if ( rc == VINF_SUCCESS
1171 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1172 && pDis->pCurInstr->uOpcode == OP_MOV
1173 && (pvFault & PAGE_OFFSET_MASK) == 0)
1174 {
1175 pPage->GCPtrLastAccessHandlerFault = pvFault;
1176 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1177 pPage->GCPtrLastAccessHandlerRip = pRegFrame->rip;
1178 /* Make sure we don't kick out a page too quickly. */
1179 if (pPage->cModifications > 8)
1180 pPage->cModifications = 2;
1181 }
1182 else if (pPage->GCPtrLastAccessHandlerFault == pvFault)
1183 {
1184 /* ignore the 2nd write to this page table entry. */
1185 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1186 }
1187 else
1188 {
1189 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
1190 pPage->GCPtrLastAccessHandlerRip = 0;
1191 }
1192
1193 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1194 pgmUnlock(pVM);
1195 return rc;
1196 }
1197
1198 /*
1199 * Windows is frequently doing small memset() operations (netio test 4k+).
1200 * We have to deal with these or we'll kill the cache and performance.
1201 */
1202 if ( pDis->pCurInstr->uOpcode == OP_STOSWD
1203 && !pRegFrame->eflags.Bits.u1DF
1204 && pDis->uOpMode == pDis->uCpuMode
1205 && pDis->uAddrMode == pDis->uCpuMode)
1206 {
1207 bool fValidStosd = false;
1208
1209 if ( pDis->uCpuMode == DISCPUMODE_32BIT
1210 && pDis->fPrefix == DISPREFIX_REP
1211 && pRegFrame->ecx <= 0x20
1212 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1213 && !((uintptr_t)pvFault & 3)
1214 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1215 )
1216 {
1217 fValidStosd = true;
1218 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1219 }
1220 else
1221 if ( pDis->uCpuMode == DISCPUMODE_64BIT
1222 && pDis->fPrefix == (DISPREFIX_REP | DISPREFIX_REX)
1223 && pRegFrame->rcx <= 0x20
1224 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1225 && !((uintptr_t)pvFault & 7)
1226 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1227 )
1228 {
1229 fValidStosd = true;
1230 }
1231
1232 if (fValidStosd)
1233 {
1234 rc = pgmPoolAccessPfHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1235 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1236 pgmUnlock(pVM);
1237 return rc;
1238 }
1239 }
1240
1241 /* REP prefix, don't bother. */
1242 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1243 Log4(("pgmPoolAccessPfHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1244 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode, pDis->fPrefix));
1245 fNotReusedNotForking = true;
1246 }
1247
1248#if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1249 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1250 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1251 */
1252 if ( pPage->cModifications >= cMaxModifications
1253 && !fForcedFlush
1254 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1255 && ( fNotReusedNotForking
1256 || ( !pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
1257 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1258 )
1259 )
1260 {
1261 Assert(!pgmPoolIsPageLocked(pPage));
1262 Assert(pPage->fDirty == false);
1263
1264 /* Flush any monitored duplicates as we will disable write protection. */
1265 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1266 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1267 {
1268 PPGMPOOLPAGE pPageHead = pPage;
1269
1270 /* Find the monitor head. */
1271 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1272 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1273
1274 while (pPageHead)
1275 {
1276 unsigned idxNext = pPageHead->iMonitoredNext;
1277
1278 if (pPageHead != pPage)
1279 {
1280 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1281 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1282 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1283 AssertRC(rc2);
1284 }
1285
1286 if (idxNext == NIL_PGMPOOL_IDX)
1287 break;
1288
1289 pPageHead = &pPool->aPages[idxNext];
1290 }
1291 }
1292
1293 /* The flushing above might fail for locked pages, so double check. */
1294 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1295 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1296 {
1297 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1298
1299 /* Temporarily allow write access to the page table again. */
1300 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK, pPage->GCPhys & PAGE_BASE_GC_MASK);
1301 if (rc == VINF_SUCCESS)
1302 {
1303 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1304 AssertMsg(rc == VINF_SUCCESS
1305 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1306 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1307 || rc == VERR_PAGE_NOT_PRESENT,
1308 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1309# ifdef VBOX_STRICT
1310 pPage->GCPtrDirtyFault = pvFault;
1311# endif
1312
1313 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1314 pgmUnlock(pVM);
1315 return rc;
1316 }
1317 }
1318 }
1319#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1320
1321 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushModOverflow));
1322flushPage:
1323 /*
1324 * Not worth it, so flush it.
1325 *
1326 * If we considered it to be reused, don't go back to ring-3
1327 * to emulate failed instructions since we usually cannot
1328 * interpret then. This may be a bit risky, in which case
1329 * the reuse detection must be fixed.
1330 */
1331 rc = pgmPoolAccessPfHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1332 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1333 && fReused)
1334 {
1335 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1336 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1337 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1338 }
1339 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1340 pgmUnlock(pVM);
1341 return rc;
1342}
1343
1344# endif /* !IN_RING3 */
1345
1346/**
1347 * @callback_method_impl{FNPGMPHYSHANDLER,
1348 * Access handler for shadowed page table pages.}
1349 */
1350PGM_ALL_CB2_DECL(VBOXSTRICTRC)
1351pgmPoolAccessHandler(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf,
1352 PGMACCESSTYPE enmAccessType, PGMACCESSORIGIN enmOrigin, void *pvUser)
1353{
1354 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1355 STAM_PROFILE_START(&pPool->StatMonitorR3, a);
1356 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1357 LogFlow(("PGM_ALL_CB_DECL: GCPhys=%RGp %p:{.Core=%RHp, .idx=%d, .GCPhys=%RGp, .enmType=%d}\n",
1358 GCPhys, pPage, pPage->Core.Key, pPage->idx, pPage->GCPhys, pPage->enmKind));
1359
1360 NOREF(pvBuf); NOREF(enmAccessType);
1361
1362 /*
1363 * Make sure the pool page wasn't modified by a different CPU.
1364 */
1365 pgmLock(pVM);
1366 if (PHYS_PAGE_ADDRESS(GCPhys) == PHYS_PAGE_ADDRESS(pPage->GCPhys))
1367 {
1368 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1369
1370 /* The max modification count before flushing depends on the context and page type. */
1371#ifdef IN_RING3
1372 uint16_t const cMaxModifications = 96; /* it's cheaper here, right? */
1373#else
1374 uint16_t cMaxModifications;
1375 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1376 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1377 cMaxModifications = 4;
1378 else
1379 cMaxModifications = 24;
1380# ifdef IN_RC
1381 cMaxModifications *= 2; /* traps are cheaper than exists. */
1382# endif
1383#endif
1384
1385 /*
1386 * We don't have to be very sophisticated about this since there are relativly few calls here.
1387 * However, we must try our best to detect any non-cpu accesses (disk / networking).
1388 */
1389 if ( ( pPage->cModifications < cMaxModifications
1390 || pgmPoolIsPageLocked(pPage) )
1391 && enmOrigin != PGMACCESSORIGIN_DEVICE
1392 && cbBuf <= 16)
1393 {
1394 /* Clear the shadow entry. */
1395 if (!pPage->cModifications++)
1396 pgmPoolMonitorModifiedInsert(pPool, pPage);
1397
1398 if (cbBuf <= 8)
1399 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, (uint32_t)cbBuf);
1400 else
1401 {
1402 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, 8);
1403 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys + 8, (uint8_t *)pvBuf + 8, (uint32_t)cbBuf - 8);
1404 }
1405 }
1406 else
1407 {
1408 /* ASSUME that VERR_PGM_POOL_CLEARED can be ignored here and that FFs will deal with it in due time. */
1409 pgmPoolMonitorChainFlush(pPool, pPage);
1410 }
1411
1412 STAM_PROFILE_STOP_EX(&pPool->StatMonitorR3, &pPool->StatMonitorR3FlushPage, a);
1413 }
1414 else
1415 Log(("CPU%d: PGM_ALL_CB_DECL pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhys), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1416 pgmUnlock(pVM);
1417 return VINF_PGM_HANDLER_DO_DEFAULT;
1418}
1419
1420
1421# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1422
1423# if defined(VBOX_STRICT) && !defined(IN_RING3)
1424
1425/**
1426 * Check references to guest physical memory in a PAE / PAE page table.
1427 *
1428 * @param pPool The pool.
1429 * @param pPage The page.
1430 * @param pShwPT The shadow page table (mapping of the page).
1431 * @param pGstPT The guest page table.
1432 */
1433static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1434{
1435 unsigned cErrors = 0;
1436 int LastRc = -1; /* initialized to shut up gcc */
1437 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1438 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1439 PVM pVM = pPool->CTX_SUFF(pVM);
1440
1441#ifdef VBOX_STRICT
1442 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1443 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1444#endif
1445 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1446 {
1447 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1448 {
1449 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1450 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1451 if ( rc != VINF_SUCCESS
1452 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1453 {
1454 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1455 LastPTE = i;
1456 LastRc = rc;
1457 LastHCPhys = HCPhys;
1458 cErrors++;
1459
1460 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1461 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1462 AssertRC(rc);
1463
1464 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1465 {
1466 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1467
1468 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1469 {
1470 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1471
1472 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1473 {
1474 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1475 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1476 {
1477 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1478 }
1479 }
1480
1481 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1482 }
1483 }
1484 }
1485 }
1486 }
1487 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1488}
1489
1490
1491/**
1492 * Check references to guest physical memory in a PAE / 32-bit page table.
1493 *
1494 * @param pPool The pool.
1495 * @param pPage The page.
1496 * @param pShwPT The shadow page table (mapping of the page).
1497 * @param pGstPT The guest page table.
1498 */
1499static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1500{
1501 unsigned cErrors = 0;
1502 int LastRc = -1; /* initialized to shut up gcc */
1503 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1504 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1505 PVM pVM = pPool->CTX_SUFF(pVM);
1506
1507#ifdef VBOX_STRICT
1508 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1509 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1510#endif
1511 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1512 {
1513 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1514 {
1515 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1516 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1517 if ( rc != VINF_SUCCESS
1518 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1519 {
1520 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1521 LastPTE = i;
1522 LastRc = rc;
1523 LastHCPhys = HCPhys;
1524 cErrors++;
1525
1526 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1527 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1528 AssertRC(rc);
1529
1530 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1531 {
1532 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1533
1534 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1535 {
1536 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1537
1538 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1539 {
1540 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1541 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1542 {
1543 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1544 }
1545 }
1546
1547 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1548 }
1549 }
1550 }
1551 }
1552 }
1553 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1554}
1555
1556# endif /* VBOX_STRICT && !IN_RING3 */
1557
1558/**
1559 * Clear references to guest physical memory in a PAE / PAE page table.
1560 *
1561 * @returns nr of changed PTEs
1562 * @param pPool The pool.
1563 * @param pPage The page.
1564 * @param pShwPT The shadow page table (mapping of the page).
1565 * @param pGstPT The guest page table.
1566 * @param pOldGstPT The old cached guest page table.
1567 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1568 * @param pfFlush Flush reused page table (out)
1569 */
1570DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1571 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1572{
1573 unsigned cChanged = 0;
1574
1575#ifdef VBOX_STRICT
1576 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1577 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1578#endif
1579 *pfFlush = false;
1580
1581 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1582 {
1583 /* Check the new value written by the guest. If present and with a bogus physical address, then
1584 * it's fairly safe to assume the guest is reusing the PT.
1585 */
1586 if ( fAllowRemoval
1587 && pGstPT->a[i].n.u1Present)
1588 {
1589 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1590 {
1591 *pfFlush = true;
1592 return ++cChanged;
1593 }
1594 }
1595 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1596 {
1597 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1598 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1599 {
1600#ifdef VBOX_STRICT
1601 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1602 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1603 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1604#endif
1605 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1606 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1607 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1608 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1609
1610 if ( uHostAttr == uGuestAttr
1611 && fHostRW <= fGuestRW)
1612 continue;
1613 }
1614 cChanged++;
1615 /* Something was changed, so flush it. */
1616 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1617 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1618 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1619 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1620 }
1621 }
1622 return cChanged;
1623}
1624
1625
1626/**
1627 * Clear references to guest physical memory in a PAE / PAE page table.
1628 *
1629 * @returns nr of changed PTEs
1630 * @param pPool The pool.
1631 * @param pPage The page.
1632 * @param pShwPT The shadow page table (mapping of the page).
1633 * @param pGstPT The guest page table.
1634 * @param pOldGstPT The old cached guest page table.
1635 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1636 * @param pfFlush Flush reused page table (out)
1637 */
1638DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1639 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1640{
1641 unsigned cChanged = 0;
1642
1643#ifdef VBOX_STRICT
1644 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1645 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1646#endif
1647 *pfFlush = false;
1648
1649 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1650 {
1651 /* Check the new value written by the guest. If present and with a bogus physical address, then
1652 * it's fairly safe to assume the guest is reusing the PT.
1653 */
1654 if ( fAllowRemoval
1655 && pGstPT->a[i].n.u1Present)
1656 {
1657 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK))
1658 {
1659 *pfFlush = true;
1660 return ++cChanged;
1661 }
1662 }
1663 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1664 {
1665 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1666 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1667 {
1668#ifdef VBOX_STRICT
1669 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1670 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1671 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1672#endif
1673 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1674 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1675 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1676 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1677
1678 if ( uHostAttr == uGuestAttr
1679 && fHostRW <= fGuestRW)
1680 continue;
1681 }
1682 cChanged++;
1683 /* Something was changed, so flush it. */
1684 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1685 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1686 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1687 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1688 }
1689 }
1690 return cChanged;
1691}
1692
1693
1694/**
1695 * Flush a dirty page
1696 *
1697 * @param pVM Pointer to the VM.
1698 * @param pPool The pool.
1699 * @param idxSlot Dirty array slot index
1700 * @param fAllowRemoval Allow a reused page table to be removed
1701 */
1702static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1703{
1704 PPGMPOOLPAGE pPage;
1705 unsigned idxPage;
1706
1707 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1708 if (pPool->aDirtyPages[idxSlot].uIdx == NIL_PGMPOOL_IDX)
1709 return;
1710
1711 idxPage = pPool->aDirtyPages[idxSlot].uIdx;
1712 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1713 pPage = &pPool->aPages[idxPage];
1714 Assert(pPage->idx == idxPage);
1715 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1716
1717 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1718 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1719
1720#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1721 PVMCPU pVCpu = VMMGetCpu(pVM);
1722 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
1723#endif
1724
1725 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1726 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK);
1727 Assert(rc == VINF_SUCCESS);
1728 pPage->fDirty = false;
1729
1730#ifdef VBOX_STRICT
1731 uint64_t fFlags = 0;
1732 RTHCPHYS HCPhys;
1733 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->GCPtrDirtyFault, &fFlags, &HCPhys);
1734 AssertMsg( ( rc == VINF_SUCCESS
1735 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1736 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1737 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1738 || rc == VERR_PAGE_NOT_PRESENT,
1739 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->GCPtrDirtyFault, rc, fFlags));
1740#endif
1741
1742 /* Flush those PTEs that have changed. */
1743 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1744 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1745 void *pvGst;
1746 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1747 bool fFlush;
1748 unsigned cChanges;
1749
1750 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1751 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1752 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1753 else
1754 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1755 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1756
1757 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1758 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1759 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1760 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1761
1762 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1763 Assert(pPage->cModifications);
1764 if (cChanges < 4)
1765 pPage->cModifications = 1; /* must use > 0 here */
1766 else
1767 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1768
1769 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1770 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1771 pPool->idxFreeDirtyPage = idxSlot;
1772
1773 pPool->cDirtyPages--;
1774 pPool->aDirtyPages[idxSlot].uIdx = NIL_PGMPOOL_IDX;
1775 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1776 if (fFlush)
1777 {
1778 Assert(fAllowRemoval);
1779 Log(("Flush reused page table!\n"));
1780 pgmPoolFlushPage(pPool, pPage);
1781 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1782 }
1783 else
1784 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1785
1786#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1787 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
1788#endif
1789}
1790
1791
1792# ifndef IN_RING3
1793/**
1794 * Add a new dirty page
1795 *
1796 * @param pVM Pointer to the VM.
1797 * @param pPool The pool.
1798 * @param pPage The page.
1799 */
1800void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1801{
1802 unsigned idxFree;
1803
1804 PGM_LOCK_ASSERT_OWNER(pVM);
1805 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1806 Assert(!pPage->fDirty);
1807
1808 idxFree = pPool->idxFreeDirtyPage;
1809 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1810 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1811
1812 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1813 {
1814 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1815 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1816 }
1817 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1818 AssertMsg(pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1819
1820 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1821
1822 /*
1823 * Make a copy of the guest page table as we require valid GCPhys addresses
1824 * when removing references to physical pages.
1825 * (The HCPhys linear lookup is *extremely* expensive!)
1826 */
1827 void *pvGst;
1828 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1829 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst, (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT) ? PAGE_SIZE : PAGE_SIZE/2);
1830# ifdef VBOX_STRICT
1831 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1832 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1833 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1834 else
1835 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1836 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1837# endif
1838 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1839
1840 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1841 pPage->fDirty = true;
1842 pPage->idxDirtyEntry = (uint8_t)idxFree; Assert(pPage->idxDirtyEntry == idxFree);
1843 pPool->aDirtyPages[idxFree].uIdx = pPage->idx;
1844 pPool->cDirtyPages++;
1845
1846 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1847 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1848 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1849 {
1850 unsigned i;
1851 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1852 {
1853 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1854 if (pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX)
1855 {
1856 pPool->idxFreeDirtyPage = idxFree;
1857 break;
1858 }
1859 }
1860 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1861 }
1862
1863 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX);
1864
1865 /*
1866 * Clear all references to this shadow table. See @bugref{7298}.
1867 */
1868 pgmPoolTrackClearPageUsers(pPool, pPage);
1869}
1870# endif /* !IN_RING3 */
1871
1872
1873/**
1874 * Check if the specified page is dirty (not write monitored)
1875 *
1876 * @return dirty or not
1877 * @param pVM Pointer to the VM.
1878 * @param GCPhys Guest physical address
1879 */
1880bool pgmPoolIsDirtyPage(PVM pVM, RTGCPHYS GCPhys)
1881{
1882 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1883 PGM_LOCK_ASSERT_OWNER(pVM);
1884 if (!pPool->cDirtyPages)
1885 return false;
1886
1887 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1888
1889 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1890 {
1891 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
1892 {
1893 PPGMPOOLPAGE pPage;
1894 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
1895
1896 pPage = &pPool->aPages[idxPage];
1897 if (pPage->GCPhys == GCPhys)
1898 return true;
1899 }
1900 }
1901 return false;
1902}
1903
1904
1905/**
1906 * Reset all dirty pages by reinstating page monitoring.
1907 *
1908 * @param pVM Pointer to the VM.
1909 */
1910void pgmPoolResetDirtyPages(PVM pVM)
1911{
1912 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1913 PGM_LOCK_ASSERT_OWNER(pVM);
1914 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1915
1916 if (!pPool->cDirtyPages)
1917 return;
1918
1919 Log(("pgmPoolResetDirtyPages\n"));
1920 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1921 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1922
1923 pPool->idxFreeDirtyPage = 0;
1924 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1925 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1926 {
1927 unsigned i;
1928 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1929 {
1930 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
1931 {
1932 pPool->idxFreeDirtyPage = i;
1933 break;
1934 }
1935 }
1936 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1937 }
1938
1939 Assert(pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
1940 return;
1941}
1942
1943
1944/**
1945 * Invalidate the PT entry for the specified page
1946 *
1947 * @param pVM Pointer to the VM.
1948 * @param GCPtrPage Guest page to invalidate
1949 */
1950void pgmPoolResetDirtyPage(PVM pVM, RTGCPTR GCPtrPage)
1951{
1952 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1953 PGM_LOCK_ASSERT_OWNER(pVM);
1954 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1955
1956 if (!pPool->cDirtyPages)
1957 return;
1958
1959 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage));
1960 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1961 {
1962 }
1963}
1964
1965
1966/**
1967 * Reset all dirty pages by reinstating page monitoring.
1968 *
1969 * @param pVM Pointer to the VM.
1970 * @param GCPhysPT Physical address of the page table
1971 */
1972void pgmPoolInvalidateDirtyPage(PVM pVM, RTGCPHYS GCPhysPT)
1973{
1974 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1975 PGM_LOCK_ASSERT_OWNER(pVM);
1976 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1977 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
1978
1979 if (!pPool->cDirtyPages)
1980 return;
1981
1982 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1983
1984 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1985 {
1986 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
1987 {
1988 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
1989
1990 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1991 if (pPage->GCPhys == GCPhysPT)
1992 {
1993 idxDirtyPage = i;
1994 break;
1995 }
1996 }
1997 }
1998
1999 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
2000 {
2001 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
2002 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
2003 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
2004 {
2005 unsigned i;
2006 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2007 {
2008 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
2009 {
2010 pPool->idxFreeDirtyPage = i;
2011 break;
2012 }
2013 }
2014 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
2015 }
2016 }
2017}
2018
2019# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
2020
2021/**
2022 * Inserts a page into the GCPhys hash table.
2023 *
2024 * @param pPool The pool.
2025 * @param pPage The page.
2026 */
2027DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2028{
2029 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
2030 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2031 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2032 pPage->iNext = pPool->aiHash[iHash];
2033 pPool->aiHash[iHash] = pPage->idx;
2034}
2035
2036
2037/**
2038 * Removes a page from the GCPhys hash table.
2039 *
2040 * @param pPool The pool.
2041 * @param pPage The page.
2042 */
2043DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2044{
2045 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
2046 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2047 if (pPool->aiHash[iHash] == pPage->idx)
2048 pPool->aiHash[iHash] = pPage->iNext;
2049 else
2050 {
2051 uint16_t iPrev = pPool->aiHash[iHash];
2052 for (;;)
2053 {
2054 const int16_t i = pPool->aPages[iPrev].iNext;
2055 if (i == pPage->idx)
2056 {
2057 pPool->aPages[iPrev].iNext = pPage->iNext;
2058 break;
2059 }
2060 if (i == NIL_PGMPOOL_IDX)
2061 {
2062 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
2063 break;
2064 }
2065 iPrev = i;
2066 }
2067 }
2068 pPage->iNext = NIL_PGMPOOL_IDX;
2069}
2070
2071
2072/**
2073 * Frees up one cache page.
2074 *
2075 * @returns VBox status code.
2076 * @retval VINF_SUCCESS on success.
2077 * @param pPool The pool.
2078 * @param iUser The user index.
2079 */
2080static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
2081{
2082#ifndef IN_RC
2083 const PVM pVM = pPool->CTX_SUFF(pVM);
2084#endif
2085 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
2086 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
2087
2088 /*
2089 * Select one page from the tail of the age list.
2090 */
2091 PPGMPOOLPAGE pPage;
2092 for (unsigned iLoop = 0; ; iLoop++)
2093 {
2094 uint16_t iToFree = pPool->iAgeTail;
2095 if (iToFree == iUser && iUser != NIL_PGMPOOL_IDX)
2096 iToFree = pPool->aPages[iToFree].iAgePrev;
2097/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2098 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2099 {
2100 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2101 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2102 {
2103 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2104 continue;
2105 iToFree = i;
2106 break;
2107 }
2108 }
2109*/
2110 Assert(iToFree != iUser);
2111 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
2112 pPage = &pPool->aPages[iToFree];
2113
2114 /*
2115 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2116 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2117 */
2118 if ( !pgmPoolIsPageLocked(pPage)
2119 && pPage->idx >= PGMPOOL_IDX_FIRST /* paranoia (#6349) */)
2120 break;
2121 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2122 pgmPoolCacheUsed(pPool, pPage);
2123 AssertLogRelReturn(iLoop < 8192, VERR_PGM_POOL_TOO_MANY_LOOPS);
2124 }
2125
2126 /*
2127 * Found a usable page, flush it and return.
2128 */
2129 int rc = pgmPoolFlushPage(pPool, pPage);
2130 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2131 /* todo: find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2132 if (rc == VINF_SUCCESS)
2133 PGM_INVL_ALL_VCPU_TLBS(pVM);
2134 return rc;
2135}
2136
2137
2138/**
2139 * Checks if a kind mismatch is really a page being reused
2140 * or if it's just normal remappings.
2141 *
2142 * @returns true if reused and the cached page (enmKind1) should be flushed
2143 * @returns false if not reused.
2144 * @param enmKind1 The kind of the cached page.
2145 * @param enmKind2 The kind of the requested page.
2146 */
2147static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2148{
2149 switch (enmKind1)
2150 {
2151 /*
2152 * Never reuse them. There is no remapping in non-paging mode.
2153 */
2154 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2155 case PGMPOOLKIND_32BIT_PD_PHYS:
2156 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2157 case PGMPOOLKIND_PAE_PD_PHYS:
2158 case PGMPOOLKIND_PAE_PDPT_PHYS:
2159 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2160 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2161 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2162 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2163 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2164 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2165 return false;
2166
2167 /*
2168 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2169 */
2170 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2171 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2172 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2173 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2174 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2175 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2176 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2177 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2178 case PGMPOOLKIND_32BIT_PD:
2179 case PGMPOOLKIND_PAE_PDPT:
2180 switch (enmKind2)
2181 {
2182 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2183 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2184 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2185 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2186 case PGMPOOLKIND_64BIT_PML4:
2187 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2188 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2189 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2190 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2191 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2192 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2193 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2194 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2195 return true;
2196 default:
2197 return false;
2198 }
2199
2200 /*
2201 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2202 */
2203 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2204 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2205 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2206 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2207 case PGMPOOLKIND_64BIT_PML4:
2208 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2209 switch (enmKind2)
2210 {
2211 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2212 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2213 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2214 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2215 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2216 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2217 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2218 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2219 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2220 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2221 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2222 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2223 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2224 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2225 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2226 return true;
2227 default:
2228 return false;
2229 }
2230
2231 /*
2232 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2233 */
2234 case PGMPOOLKIND_ROOT_NESTED:
2235 return false;
2236
2237 default:
2238 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2239 }
2240}
2241
2242
2243/**
2244 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2245 *
2246 * @returns VBox status code.
2247 * @retval VINF_PGM_CACHED_PAGE on success.
2248 * @retval VERR_FILE_NOT_FOUND if not found.
2249 * @param pPool The pool.
2250 * @param GCPhys The GC physical address of the page we're gonna shadow.
2251 * @param enmKind The kind of mapping.
2252 * @param enmAccess Access type for the mapping (only relevant for big pages)
2253 * @param fA20Enabled Whether the CPU has the A20 gate enabled.
2254 * @param iUser The shadow page pool index of the user table. This is
2255 * NIL_PGMPOOL_IDX for root pages.
2256 * @param iUserTable The index into the user table (shadowed). Ignored if
2257 * root page
2258 * @param ppPage Where to store the pointer to the page.
2259 */
2260static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
2261 uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2262{
2263 /*
2264 * Look up the GCPhys in the hash.
2265 */
2266 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2267 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2268 if (i != NIL_PGMPOOL_IDX)
2269 {
2270 do
2271 {
2272 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2273 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2274 if (pPage->GCPhys == GCPhys)
2275 {
2276 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2277 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess
2278 && pPage->fA20Enabled == fA20Enabled)
2279 {
2280 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2281 * doesn't flush it in case there are no more free use records.
2282 */
2283 pgmPoolCacheUsed(pPool, pPage);
2284
2285 int rc = VINF_SUCCESS;
2286 if (iUser != NIL_PGMPOOL_IDX)
2287 rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2288 if (RT_SUCCESS(rc))
2289 {
2290 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2291 *ppPage = pPage;
2292 if (pPage->cModifications)
2293 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2294 STAM_COUNTER_INC(&pPool->StatCacheHits);
2295 return VINF_PGM_CACHED_PAGE;
2296 }
2297 return rc;
2298 }
2299
2300 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2301 {
2302 /*
2303 * The kind is different. In some cases we should now flush the page
2304 * as it has been reused, but in most cases this is normal remapping
2305 * of PDs as PT or big pages using the GCPhys field in a slightly
2306 * different way than the other kinds.
2307 */
2308 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2309 {
2310 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2311 pgmPoolFlushPage(pPool, pPage);
2312 break;
2313 }
2314 }
2315 }
2316
2317 /* next */
2318 i = pPage->iNext;
2319 } while (i != NIL_PGMPOOL_IDX);
2320 }
2321
2322 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2323 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2324 return VERR_FILE_NOT_FOUND;
2325}
2326
2327
2328/**
2329 * Inserts a page into the cache.
2330 *
2331 * @param pPool The pool.
2332 * @param pPage The cached page.
2333 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2334 */
2335static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2336{
2337 /*
2338 * Insert into the GCPhys hash if the page is fit for that.
2339 */
2340 Assert(!pPage->fCached);
2341 if (fCanBeCached)
2342 {
2343 pPage->fCached = true;
2344 pgmPoolHashInsert(pPool, pPage);
2345 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2346 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2347 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2348 }
2349 else
2350 {
2351 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2352 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2353 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2354 }
2355
2356 /*
2357 * Insert at the head of the age list.
2358 */
2359 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2360 pPage->iAgeNext = pPool->iAgeHead;
2361 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2362 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2363 else
2364 pPool->iAgeTail = pPage->idx;
2365 pPool->iAgeHead = pPage->idx;
2366}
2367
2368
2369/**
2370 * Flushes a cached page.
2371 *
2372 * @param pPool The pool.
2373 * @param pPage The cached page.
2374 */
2375static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2376{
2377 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2378
2379 /*
2380 * Remove the page from the hash.
2381 */
2382 if (pPage->fCached)
2383 {
2384 pPage->fCached = false;
2385 pgmPoolHashRemove(pPool, pPage);
2386 }
2387 else
2388 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2389
2390 /*
2391 * Remove it from the age list.
2392 */
2393 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2394 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2395 else
2396 pPool->iAgeTail = pPage->iAgePrev;
2397 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2398 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2399 else
2400 pPool->iAgeHead = pPage->iAgeNext;
2401 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2402 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2403}
2404
2405
2406/**
2407 * Looks for pages sharing the monitor.
2408 *
2409 * @returns Pointer to the head page.
2410 * @returns NULL if not found.
2411 * @param pPool The Pool
2412 * @param pNewPage The page which is going to be monitored.
2413 */
2414static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2415{
2416 /*
2417 * Look up the GCPhys in the hash.
2418 */
2419 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2420 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2421 if (i == NIL_PGMPOOL_IDX)
2422 return NULL;
2423 do
2424 {
2425 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2426 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2427 && pPage != pNewPage)
2428 {
2429 switch (pPage->enmKind)
2430 {
2431 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2432 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2433 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2434 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2435 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2436 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2437 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2438 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2439 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2440 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2441 case PGMPOOLKIND_64BIT_PML4:
2442 case PGMPOOLKIND_32BIT_PD:
2443 case PGMPOOLKIND_PAE_PDPT:
2444 {
2445 /* find the head */
2446 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2447 {
2448 Assert(pPage->iMonitoredPrev != pPage->idx);
2449 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2450 }
2451 return pPage;
2452 }
2453
2454 /* ignore, no monitoring. */
2455 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2456 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2457 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2458 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2459 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2460 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2461 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2462 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2463 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2464 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2465 case PGMPOOLKIND_ROOT_NESTED:
2466 case PGMPOOLKIND_PAE_PD_PHYS:
2467 case PGMPOOLKIND_PAE_PDPT_PHYS:
2468 case PGMPOOLKIND_32BIT_PD_PHYS:
2469 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2470 break;
2471 default:
2472 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2473 }
2474 }
2475
2476 /* next */
2477 i = pPage->iNext;
2478 } while (i != NIL_PGMPOOL_IDX);
2479 return NULL;
2480}
2481
2482
2483/**
2484 * Enabled write monitoring of a guest page.
2485 *
2486 * @returns VBox status code.
2487 * @retval VINF_SUCCESS on success.
2488 * @param pPool The pool.
2489 * @param pPage The cached page.
2490 */
2491static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2492{
2493 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2494
2495 /*
2496 * Filter out the relevant kinds.
2497 */
2498 switch (pPage->enmKind)
2499 {
2500 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2501 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2502 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2503 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2504 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2505 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2506 case PGMPOOLKIND_64BIT_PML4:
2507 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2508 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2509 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2510 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2511 case PGMPOOLKIND_32BIT_PD:
2512 case PGMPOOLKIND_PAE_PDPT:
2513 break;
2514
2515 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2516 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2517 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2518 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2519 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2520 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2521 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2522 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2523 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2524 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2525 case PGMPOOLKIND_ROOT_NESTED:
2526 /* Nothing to monitor here. */
2527 return VINF_SUCCESS;
2528
2529 case PGMPOOLKIND_32BIT_PD_PHYS:
2530 case PGMPOOLKIND_PAE_PDPT_PHYS:
2531 case PGMPOOLKIND_PAE_PD_PHYS:
2532 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2533 /* Nothing to monitor here. */
2534 return VINF_SUCCESS;
2535 default:
2536 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2537 }
2538
2539 /*
2540 * Install handler.
2541 */
2542 int rc;
2543 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2544 if (pPageHead)
2545 {
2546 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2547 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2548
2549#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2550 if (pPageHead->fDirty)
2551 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirtyEntry, false /* do not remove */);
2552#endif
2553
2554 pPage->iMonitoredPrev = pPageHead->idx;
2555 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2556 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2557 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2558 pPageHead->iMonitoredNext = pPage->idx;
2559 rc = VINF_SUCCESS;
2560 }
2561 else
2562 {
2563 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2564 PVM pVM = pPool->CTX_SUFF(pVM);
2565 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2566 rc = PGMHandlerPhysicalRegister(pVM, GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK, pPool->hAccessHandlerType,
2567 MMHyperCCToR3(pVM, pPage), MMHyperCCToR0(pVM, pPage), MMHyperCCToRC(pVM, pPage),
2568 NIL_RTR3PTR /*pszDesc*/);
2569 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2570 * the heap size should suffice. */
2571 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2572 PVMCPU pVCpu = VMMGetCpu(pVM);
2573 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2574 }
2575 pPage->fMonitored = true;
2576 return rc;
2577}
2578
2579
2580/**
2581 * Disables write monitoring of a guest page.
2582 *
2583 * @returns VBox status code.
2584 * @retval VINF_SUCCESS on success.
2585 * @param pPool The pool.
2586 * @param pPage The cached page.
2587 */
2588static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2589{
2590 /*
2591 * Filter out the relevant kinds.
2592 */
2593 switch (pPage->enmKind)
2594 {
2595 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2596 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2597 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2598 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2599 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2600 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2601 case PGMPOOLKIND_64BIT_PML4:
2602 case PGMPOOLKIND_32BIT_PD:
2603 case PGMPOOLKIND_PAE_PDPT:
2604 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2605 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2606 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2607 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2608 break;
2609
2610 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2611 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2612 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2613 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2614 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2615 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2616 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2617 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2618 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2619 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2620 case PGMPOOLKIND_ROOT_NESTED:
2621 case PGMPOOLKIND_PAE_PD_PHYS:
2622 case PGMPOOLKIND_PAE_PDPT_PHYS:
2623 case PGMPOOLKIND_32BIT_PD_PHYS:
2624 /* Nothing to monitor here. */
2625 Assert(!pPage->fMonitored);
2626 return VINF_SUCCESS;
2627
2628 default:
2629 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2630 }
2631 Assert(pPage->fMonitored);
2632
2633 /*
2634 * Remove the page from the monitored list or uninstall it if last.
2635 */
2636 const PVM pVM = pPool->CTX_SUFF(pVM);
2637 int rc;
2638 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2639 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2640 {
2641 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2642 {
2643 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2644 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2645 rc = PGMHandlerPhysicalChangeUserArgs(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK, MMHyperCCToR3(pVM, pNewHead),
2646 MMHyperCCToR0(pVM, pNewHead), MMHyperCCToRC(pVM, pNewHead));
2647
2648 AssertFatalRCSuccess(rc);
2649 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2650 }
2651 else
2652 {
2653 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2654 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2655 {
2656 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2657 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2658 }
2659 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2660 rc = VINF_SUCCESS;
2661 }
2662 }
2663 else
2664 {
2665 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2666 AssertFatalRC(rc);
2667 PVMCPU pVCpu = VMMGetCpu(pVM);
2668 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2669 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2670 }
2671 pPage->fMonitored = false;
2672
2673 /*
2674 * Remove it from the list of modified pages (if in it).
2675 */
2676 pgmPoolMonitorModifiedRemove(pPool, pPage);
2677
2678 return rc;
2679}
2680
2681
2682/**
2683 * Inserts the page into the list of modified pages.
2684 *
2685 * @param pPool The pool.
2686 * @param pPage The page.
2687 */
2688void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2689{
2690 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2691 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2692 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2693 && pPool->iModifiedHead != pPage->idx,
2694 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2695 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2696 pPool->iModifiedHead, pPool->cModifiedPages));
2697
2698 pPage->iModifiedNext = pPool->iModifiedHead;
2699 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2700 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2701 pPool->iModifiedHead = pPage->idx;
2702 pPool->cModifiedPages++;
2703#ifdef VBOX_WITH_STATISTICS
2704 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2705 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2706#endif
2707}
2708
2709
2710/**
2711 * Removes the page from the list of modified pages and resets the
2712 * modification counter.
2713 *
2714 * @param pPool The pool.
2715 * @param pPage The page which is believed to be in the list of modified pages.
2716 */
2717static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2718{
2719 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2720 if (pPool->iModifiedHead == pPage->idx)
2721 {
2722 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2723 pPool->iModifiedHead = pPage->iModifiedNext;
2724 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2725 {
2726 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2727 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2728 }
2729 pPool->cModifiedPages--;
2730 }
2731 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2732 {
2733 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2734 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2735 {
2736 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2737 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2738 }
2739 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2740 pPool->cModifiedPages--;
2741 }
2742 else
2743 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2744 pPage->cModifications = 0;
2745}
2746
2747
2748/**
2749 * Zaps the list of modified pages, resetting their modification counters in the process.
2750 *
2751 * @param pVM Pointer to the VM.
2752 */
2753static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2754{
2755 pgmLock(pVM);
2756 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2757 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2758
2759 unsigned cPages = 0; NOREF(cPages);
2760
2761#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2762 pgmPoolResetDirtyPages(pVM);
2763#endif
2764
2765 uint16_t idx = pPool->iModifiedHead;
2766 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2767 while (idx != NIL_PGMPOOL_IDX)
2768 {
2769 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2770 idx = pPage->iModifiedNext;
2771 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2772 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2773 pPage->cModifications = 0;
2774 Assert(++cPages);
2775 }
2776 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2777 pPool->cModifiedPages = 0;
2778 pgmUnlock(pVM);
2779}
2780
2781
2782/**
2783 * Handle SyncCR3 pool tasks
2784 *
2785 * @returns VBox status code.
2786 * @retval VINF_SUCCESS if successfully added.
2787 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2788 * @param pVCpu Pointer to the VMCPU.
2789 * @remark Should only be used when monitoring is available, thus placed in
2790 * the PGMPOOL_WITH_MONITORING #ifdef.
2791 */
2792int pgmPoolSyncCR3(PVMCPU pVCpu)
2793{
2794 PVM pVM = pVCpu->CTX_SUFF(pVM);
2795 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2796
2797 /*
2798 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2799 * Occasionally we will have to clear all the shadow page tables because we wanted
2800 * to monitor a page which was mapped by too many shadowed page tables. This operation
2801 * sometimes referred to as a 'lightweight flush'.
2802 */
2803# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2804 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2805 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2806# else /* !IN_RING3 */
2807 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2808 {
2809 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2810 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2811
2812 /* Make sure all other VCPUs return to ring 3. */
2813 if (pVM->cCpus > 1)
2814 {
2815 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2816 PGM_INVL_ALL_VCPU_TLBS(pVM);
2817 }
2818 return VINF_PGM_SYNC_CR3;
2819 }
2820# endif /* !IN_RING3 */
2821 else
2822 {
2823 pgmPoolMonitorModifiedClearAll(pVM);
2824
2825 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2826 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2827 {
2828 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2829 return pgmPoolSyncCR3(pVCpu);
2830 }
2831 }
2832 return VINF_SUCCESS;
2833}
2834
2835
2836/**
2837 * Frees up at least one user entry.
2838 *
2839 * @returns VBox status code.
2840 * @retval VINF_SUCCESS if successfully added.
2841 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2842 * @param pPool The pool.
2843 * @param iUser The user index.
2844 */
2845static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2846{
2847 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2848 /*
2849 * Just free cached pages in a braindead fashion.
2850 */
2851 /** @todo walk the age list backwards and free the first with usage. */
2852 int rc = VINF_SUCCESS;
2853 do
2854 {
2855 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2856 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2857 rc = rc2;
2858 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2859 return rc;
2860}
2861
2862
2863/**
2864 * Inserts a page into the cache.
2865 *
2866 * This will create user node for the page, insert it into the GCPhys
2867 * hash, and insert it into the age list.
2868 *
2869 * @returns VBox status code.
2870 * @retval VINF_SUCCESS if successfully added.
2871 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2872 * @param pPool The pool.
2873 * @param pPage The cached page.
2874 * @param GCPhys The GC physical address of the page we're gonna shadow.
2875 * @param iUser The user index.
2876 * @param iUserTable The user table index.
2877 */
2878DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2879{
2880 int rc = VINF_SUCCESS;
2881 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2882
2883 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable));
2884
2885 if (iUser != NIL_PGMPOOL_IDX)
2886 {
2887#ifdef VBOX_STRICT
2888 /*
2889 * Check that the entry doesn't already exists.
2890 */
2891 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2892 {
2893 uint16_t i = pPage->iUserHead;
2894 do
2895 {
2896 Assert(i < pPool->cMaxUsers);
2897 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2898 i = paUsers[i].iNext;
2899 } while (i != NIL_PGMPOOL_USER_INDEX);
2900 }
2901#endif
2902
2903 /*
2904 * Find free a user node.
2905 */
2906 uint16_t i = pPool->iUserFreeHead;
2907 if (i == NIL_PGMPOOL_USER_INDEX)
2908 {
2909 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2910 if (RT_FAILURE(rc))
2911 return rc;
2912 i = pPool->iUserFreeHead;
2913 }
2914
2915 /*
2916 * Unlink the user node from the free list,
2917 * initialize and insert it into the user list.
2918 */
2919 pPool->iUserFreeHead = paUsers[i].iNext;
2920 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2921 paUsers[i].iUser = iUser;
2922 paUsers[i].iUserTable = iUserTable;
2923 pPage->iUserHead = i;
2924 }
2925 else
2926 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2927
2928
2929 /*
2930 * Insert into cache and enable monitoring of the guest page if enabled.
2931 *
2932 * Until we implement caching of all levels, including the CR3 one, we'll
2933 * have to make sure we don't try monitor & cache any recursive reuse of
2934 * a monitored CR3 page. Because all windows versions are doing this we'll
2935 * have to be able to do combined access monitoring, CR3 + PT and
2936 * PD + PT (guest PAE).
2937 *
2938 * Update:
2939 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2940 */
2941 const bool fCanBeMonitored = true;
2942 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2943 if (fCanBeMonitored)
2944 {
2945 rc = pgmPoolMonitorInsert(pPool, pPage);
2946 AssertRC(rc);
2947 }
2948 return rc;
2949}
2950
2951
2952/**
2953 * Adds a user reference to a page.
2954 *
2955 * This will move the page to the head of the
2956 *
2957 * @returns VBox status code.
2958 * @retval VINF_SUCCESS if successfully added.
2959 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2960 * @param pPool The pool.
2961 * @param pPage The cached page.
2962 * @param iUser The user index.
2963 * @param iUserTable The user table.
2964 */
2965static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2966{
2967 Log3(("pgmPoolTrackAddUser: GCPhys=%RGp iUser=%%x iUserTable=%x\n", pPage->GCPhys, iUser, iUserTable));
2968 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2969 Assert(iUser != NIL_PGMPOOL_IDX);
2970
2971# ifdef VBOX_STRICT
2972 /*
2973 * Check that the entry doesn't already exists. We only allow multiple
2974 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
2975 */
2976 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2977 {
2978 uint16_t i = pPage->iUserHead;
2979 do
2980 {
2981 Assert(i < pPool->cMaxUsers);
2982 /** @todo this assertion looks odd... Shouldn't it be && here? */
2983 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2984 i = paUsers[i].iNext;
2985 } while (i != NIL_PGMPOOL_USER_INDEX);
2986 }
2987# endif
2988
2989 /*
2990 * Allocate a user node.
2991 */
2992 uint16_t i = pPool->iUserFreeHead;
2993 if (i == NIL_PGMPOOL_USER_INDEX)
2994 {
2995 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2996 if (RT_FAILURE(rc))
2997 return rc;
2998 i = pPool->iUserFreeHead;
2999 }
3000 pPool->iUserFreeHead = paUsers[i].iNext;
3001
3002 /*
3003 * Initialize the user node and insert it.
3004 */
3005 paUsers[i].iNext = pPage->iUserHead;
3006 paUsers[i].iUser = iUser;
3007 paUsers[i].iUserTable = iUserTable;
3008 pPage->iUserHead = i;
3009
3010# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3011 if (pPage->fDirty)
3012 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirtyEntry, false /* do not remove */);
3013# endif
3014
3015 /*
3016 * Tell the cache to update its replacement stats for this page.
3017 */
3018 pgmPoolCacheUsed(pPool, pPage);
3019 return VINF_SUCCESS;
3020}
3021
3022
3023/**
3024 * Frees a user record associated with a page.
3025 *
3026 * This does not clear the entry in the user table, it simply replaces the
3027 * user record to the chain of free records.
3028 *
3029 * @param pPool The pool.
3030 * @param HCPhys The HC physical address of the shadow page.
3031 * @param iUser The shadow page pool index of the user table.
3032 * @param iUserTable The index into the user table (shadowed).
3033 *
3034 * @remarks Don't call this for root pages.
3035 */
3036static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3037{
3038 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
3039 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3040 Assert(iUser != NIL_PGMPOOL_IDX);
3041
3042 /*
3043 * Unlink and free the specified user entry.
3044 */
3045
3046 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
3047 uint16_t i = pPage->iUserHead;
3048 if ( i != NIL_PGMPOOL_USER_INDEX
3049 && paUsers[i].iUser == iUser
3050 && paUsers[i].iUserTable == iUserTable)
3051 {
3052 pPage->iUserHead = paUsers[i].iNext;
3053
3054 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3055 paUsers[i].iNext = pPool->iUserFreeHead;
3056 pPool->iUserFreeHead = i;
3057 return;
3058 }
3059
3060 /* General: Linear search. */
3061 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
3062 while (i != NIL_PGMPOOL_USER_INDEX)
3063 {
3064 if ( paUsers[i].iUser == iUser
3065 && paUsers[i].iUserTable == iUserTable)
3066 {
3067 if (iPrev != NIL_PGMPOOL_USER_INDEX)
3068 paUsers[iPrev].iNext = paUsers[i].iNext;
3069 else
3070 pPage->iUserHead = paUsers[i].iNext;
3071
3072 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3073 paUsers[i].iNext = pPool->iUserFreeHead;
3074 pPool->iUserFreeHead = i;
3075 return;
3076 }
3077 iPrev = i;
3078 i = paUsers[i].iNext;
3079 }
3080
3081 /* Fatal: didn't find it */
3082 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
3083 iUser, iUserTable, pPage->GCPhys));
3084}
3085
3086
3087/**
3088 * Gets the entry size of a shadow table.
3089 *
3090 * @param enmKind The kind of page.
3091 *
3092 * @returns The size of the entry in bytes. That is, 4 or 8.
3093 * @returns If the kind is not for a table, an assertion is raised and 0 is
3094 * returned.
3095 */
3096DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
3097{
3098 switch (enmKind)
3099 {
3100 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3101 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3102 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3103 case PGMPOOLKIND_32BIT_PD:
3104 case PGMPOOLKIND_32BIT_PD_PHYS:
3105 return 4;
3106
3107 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3108 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3109 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3110 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3111 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3112 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3113 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3114 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3115 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3116 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3117 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3118 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3119 case PGMPOOLKIND_64BIT_PML4:
3120 case PGMPOOLKIND_PAE_PDPT:
3121 case PGMPOOLKIND_ROOT_NESTED:
3122 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3123 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3124 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3125 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3126 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3127 case PGMPOOLKIND_PAE_PD_PHYS:
3128 case PGMPOOLKIND_PAE_PDPT_PHYS:
3129 return 8;
3130
3131 default:
3132 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3133 }
3134}
3135
3136
3137/**
3138 * Gets the entry size of a guest table.
3139 *
3140 * @param enmKind The kind of page.
3141 *
3142 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3143 * @returns If the kind is not for a table, an assertion is raised and 0 is
3144 * returned.
3145 */
3146DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3147{
3148 switch (enmKind)
3149 {
3150 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3151 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3152 case PGMPOOLKIND_32BIT_PD:
3153 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3154 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3155 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3156 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3157 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3158 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3159 return 4;
3160
3161 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3162 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3163 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3164 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3165 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3166 case PGMPOOLKIND_64BIT_PML4:
3167 case PGMPOOLKIND_PAE_PDPT:
3168 return 8;
3169
3170 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3171 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3172 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3173 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3174 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3175 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3176 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3177 case PGMPOOLKIND_ROOT_NESTED:
3178 case PGMPOOLKIND_PAE_PD_PHYS:
3179 case PGMPOOLKIND_PAE_PDPT_PHYS:
3180 case PGMPOOLKIND_32BIT_PD_PHYS:
3181 /** @todo can we return 0? (nobody is calling this...) */
3182 AssertFailed();
3183 return 0;
3184
3185 default:
3186 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3187 }
3188}
3189
3190
3191/**
3192 * Checks one shadow page table entry for a mapping of a physical page.
3193 *
3194 * @returns true / false indicating removal of all relevant PTEs
3195 *
3196 * @param pVM Pointer to the VM.
3197 * @param pPhysPage The guest page in question.
3198 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3199 * @param iShw The shadow page table.
3200 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3201 */
3202static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3203{
3204 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3205 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3206 bool fRet = false;
3207
3208 /*
3209 * Assert sanity.
3210 */
3211 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3212 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3213 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3214
3215 /*
3216 * Then, clear the actual mappings to the page in the shadow PT.
3217 */
3218 switch (pPage->enmKind)
3219 {
3220 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3221 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3222 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3223 {
3224 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3225 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3226 uint32_t u32AndMask = 0;
3227 uint32_t u32OrMask = 0;
3228
3229 if (!fFlushPTEs)
3230 {
3231 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3232 {
3233 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
3234 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
3235 u32OrMask = X86_PTE_RW;
3236 u32AndMask = UINT32_MAX;
3237 fRet = true;
3238 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3239 break;
3240
3241 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3242 u32OrMask = 0;
3243 u32AndMask = ~X86_PTE_RW;
3244 fRet = true;
3245 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3246 break;
3247 default:
3248 /* (shouldn't be here, will assert below) */
3249 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3250 break;
3251 }
3252 }
3253 else
3254 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3255
3256 /* Update the counter if we're removing references. */
3257 if (!u32AndMask)
3258 {
3259 Assert(pPage->cPresent);
3260 Assert(pPool->cPresent);
3261 pPage->cPresent--;
3262 pPool->cPresent--;
3263 }
3264
3265 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3266 {
3267 X86PTE Pte;
3268
3269 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3270 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3271 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3272 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3273
3274 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3275 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3276 return fRet;
3277 }
3278#ifdef LOG_ENABLED
3279 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3280 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3281 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3282 {
3283 Log(("i=%d cFound=%d\n", i, ++cFound));
3284 }
3285#endif
3286 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3287 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3288 break;
3289 }
3290
3291 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3292 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3293 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3294 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3295 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3296 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3297 {
3298 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3299 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3300 uint64_t u64OrMask = 0;
3301 uint64_t u64AndMask = 0;
3302
3303 if (!fFlushPTEs)
3304 {
3305 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3306 {
3307 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3308 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3309 u64OrMask = X86_PTE_RW;
3310 u64AndMask = UINT64_MAX;
3311 fRet = true;
3312 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3313 break;
3314
3315 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3316 u64OrMask = 0;
3317 u64AndMask = ~(uint64_t)X86_PTE_RW;
3318 fRet = true;
3319 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3320 break;
3321
3322 default:
3323 /* (shouldn't be here, will assert below) */
3324 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3325 break;
3326 }
3327 }
3328 else
3329 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3330
3331 /* Update the counter if we're removing references. */
3332 if (!u64AndMask)
3333 {
3334 Assert(pPage->cPresent);
3335 Assert(pPool->cPresent);
3336 pPage->cPresent--;
3337 pPool->cPresent--;
3338 }
3339
3340 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3341 {
3342 X86PTEPAE Pte;
3343
3344 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3345 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3346 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3347 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3348
3349 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3350 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3351 return fRet;
3352 }
3353#ifdef LOG_ENABLED
3354 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3355 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3356 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3357 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3358 Log(("i=%d cFound=%d\n", i, ++cFound));
3359#endif
3360 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3361 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3362 break;
3363 }
3364
3365#ifdef PGM_WITH_LARGE_PAGES
3366 /* Large page case only. */
3367 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3368 {
3369 Assert(pVM->pgm.s.fNestedPaging);
3370
3371 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3372 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3373
3374 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3375 {
3376 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3377 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3378 pPD->a[iPte].u = 0;
3379 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3380
3381 /* Update the counter as we're removing references. */
3382 Assert(pPage->cPresent);
3383 Assert(pPool->cPresent);
3384 pPage->cPresent--;
3385 pPool->cPresent--;
3386
3387 return fRet;
3388 }
3389# ifdef LOG_ENABLED
3390 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3391 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3392 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3393 Log(("i=%d cFound=%d\n", i, ++cFound));
3394# endif
3395 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3396 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3397 break;
3398 }
3399
3400 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3401 case PGMPOOLKIND_PAE_PD_PHYS:
3402 {
3403 Assert(pVM->pgm.s.fNestedPaging);
3404
3405 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3406 PX86PD pPD = (PX86PD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3407
3408 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3409 {
3410 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3411 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3412 pPD->a[iPte].u = 0;
3413 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3414
3415 /* Update the counter as we're removing references. */
3416 Assert(pPage->cPresent);
3417 Assert(pPool->cPresent);
3418 pPage->cPresent--;
3419 pPool->cPresent--;
3420 return fRet;
3421 }
3422# ifdef LOG_ENABLED
3423 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3424 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3425 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3426 Log(("i=%d cFound=%d\n", i, ++cFound));
3427# endif
3428 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3429 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3430 break;
3431 }
3432#endif /* PGM_WITH_LARGE_PAGES */
3433
3434 default:
3435 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3436 }
3437
3438 /* not reached. */
3439#ifndef _MSC_VER
3440 return fRet;
3441#endif
3442}
3443
3444
3445/**
3446 * Scans one shadow page table for mappings of a physical page.
3447 *
3448 * @param pVM Pointer to the VM.
3449 * @param pPhysPage The guest page in question.
3450 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3451 * @param iShw The shadow page table.
3452 */
3453static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3454{
3455 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3456
3457 /* We should only come here with when there's only one reference to this physical page. */
3458 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3459
3460 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3461 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3462 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3463 if (!fKeptPTEs)
3464 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3465 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3466}
3467
3468
3469/**
3470 * Flushes a list of shadow page tables mapping the same physical page.
3471 *
3472 * @param pVM Pointer to the VM.
3473 * @param pPhysPage The guest page in question.
3474 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3475 * @param iPhysExt The physical cross reference extent list to flush.
3476 */
3477static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3478{
3479 PGM_LOCK_ASSERT_OWNER(pVM);
3480 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3481 bool fKeepList = false;
3482
3483 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3484 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3485
3486 const uint16_t iPhysExtStart = iPhysExt;
3487 PPGMPOOLPHYSEXT pPhysExt;
3488 do
3489 {
3490 Assert(iPhysExt < pPool->cMaxPhysExts);
3491 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3492 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3493 {
3494 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3495 {
3496 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3497 if (!fKeptPTEs)
3498 {
3499 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3500 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3501 }
3502 else
3503 fKeepList = true;
3504 }
3505 }
3506 /* next */
3507 iPhysExt = pPhysExt->iNext;
3508 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3509
3510 if (!fKeepList)
3511 {
3512 /* insert the list into the free list and clear the ram range entry. */
3513 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3514 pPool->iPhysExtFreeHead = iPhysExtStart;
3515 /* Invalidate the tracking data. */
3516 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3517 }
3518
3519 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3520}
3521
3522
3523/**
3524 * Flushes all shadow page table mappings of the given guest page.
3525 *
3526 * This is typically called when the host page backing the guest one has been
3527 * replaced or when the page protection was changed due to a guest access
3528 * caught by the monitoring.
3529 *
3530 * @returns VBox status code.
3531 * @retval VINF_SUCCESS if all references has been successfully cleared.
3532 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3533 * pool cleaning. FF and sync flags are set.
3534 *
3535 * @param pVM Pointer to the VM.
3536 * @param GCPhysPage GC physical address of the page in question
3537 * @param pPhysPage The guest page in question.
3538 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3539 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3540 * flushed, it is NOT touched if this isn't necessary.
3541 * The caller MUST initialized this to @a false.
3542 */
3543int pgmPoolTrackUpdateGCPhys(PVM pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3544{
3545 PVMCPU pVCpu = VMMGetCpu(pVM);
3546 pgmLock(pVM);
3547 int rc = VINF_SUCCESS;
3548
3549#ifdef PGM_WITH_LARGE_PAGES
3550 /* Is this page part of a large page? */
3551 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3552 {
3553 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3554 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3555
3556 /* Fetch the large page base. */
3557 PPGMPAGE pLargePage;
3558 if (GCPhysBase != GCPhysPage)
3559 {
3560 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3561 AssertFatal(pLargePage);
3562 }
3563 else
3564 pLargePage = pPhysPage;
3565
3566 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3567
3568 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3569 {
3570 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3571 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3572 pVM->pgm.s.cLargePagesDisabled++;
3573
3574 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3575 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3576
3577 *pfFlushTLBs = true;
3578 pgmUnlock(pVM);
3579 return rc;
3580 }
3581 }
3582#else
3583 NOREF(GCPhysPage);
3584#endif /* PGM_WITH_LARGE_PAGES */
3585
3586 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3587 if (u16)
3588 {
3589 /*
3590 * The zero page is currently screwing up the tracking and we'll
3591 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3592 * is defined, zero pages won't normally be mapped. Some kind of solution
3593 * will be needed for this problem of course, but it will have to wait...
3594 */
3595 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3596 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3597 rc = VINF_PGM_GCPHYS_ALIASED;
3598 else
3599 {
3600# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC) /** @todo we can drop this now. */
3601 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3602 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3603 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
3604# endif
3605
3606 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3607 {
3608 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3609 pgmPoolTrackFlushGCPhysPT(pVM,
3610 pPhysPage,
3611 fFlushPTEs,
3612 PGMPOOL_TD_GET_IDX(u16));
3613 }
3614 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3615 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3616 else
3617 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3618 *pfFlushTLBs = true;
3619
3620# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
3621 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
3622# endif
3623 }
3624 }
3625
3626 if (rc == VINF_PGM_GCPHYS_ALIASED)
3627 {
3628 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3629 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3630 rc = VINF_PGM_SYNC_CR3;
3631 }
3632 pgmUnlock(pVM);
3633 return rc;
3634}
3635
3636
3637/**
3638 * Scans all shadow page tables for mappings of a physical page.
3639 *
3640 * This may be slow, but it's most likely more efficient than cleaning
3641 * out the entire page pool / cache.
3642 *
3643 * @returns VBox status code.
3644 * @retval VINF_SUCCESS if all references has been successfully cleared.
3645 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3646 * a page pool cleaning.
3647 *
3648 * @param pVM Pointer to the VM.
3649 * @param pPhysPage The guest page in question.
3650 */
3651int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3652{
3653 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3654 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3655 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3656 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3657
3658 /*
3659 * There is a limit to what makes sense.
3660 */
3661 if ( pPool->cPresent > 1024
3662 && pVM->cCpus == 1)
3663 {
3664 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3665 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3666 return VINF_PGM_GCPHYS_ALIASED;
3667 }
3668
3669 /*
3670 * Iterate all the pages until we've encountered all that in use.
3671 * This is simple but not quite optimal solution.
3672 */
3673 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P; /** @todo drop X86_PTE_P here as we always test if present separately, anyway. */
3674 const uint32_t u32 = u64; /** @todo move into the 32BIT_PT_xx case */
3675 unsigned cLeft = pPool->cUsedPages;
3676 unsigned iPage = pPool->cCurPages;
3677 while (--iPage >= PGMPOOL_IDX_FIRST)
3678 {
3679 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3680 if ( pPage->GCPhys != NIL_RTGCPHYS
3681 && pPage->cPresent)
3682 {
3683 switch (pPage->enmKind)
3684 {
3685 /*
3686 * We only care about shadow page tables.
3687 */
3688 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3689 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3690 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3691 {
3692 unsigned cPresent = pPage->cPresent;
3693 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3694 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3695 if (pPT->a[i].n.u1Present)
3696 {
3697 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3698 {
3699 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3700 pPT->a[i].u = 0;
3701
3702 /* Update the counter as we're removing references. */
3703 Assert(pPage->cPresent);
3704 Assert(pPool->cPresent);
3705 pPage->cPresent--;
3706 pPool->cPresent--;
3707 }
3708 if (!--cPresent)
3709 break;
3710 }
3711 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3712 break;
3713 }
3714
3715 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3716 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3717 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3718 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3719 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3720 {
3721 unsigned cPresent = pPage->cPresent;
3722 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3723 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3724 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3725 {
3726 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3727 {
3728 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3729 PGMSHWPTEPAE_SET(pPT->a[i], 0); /// @todo why not atomic?
3730
3731 /* Update the counter as we're removing references. */
3732 Assert(pPage->cPresent);
3733 Assert(pPool->cPresent);
3734 pPage->cPresent--;
3735 pPool->cPresent--;
3736 }
3737 if (!--cPresent)
3738 break;
3739 }
3740 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3741 break;
3742 }
3743#ifndef IN_RC
3744 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3745 {
3746 unsigned cPresent = pPage->cPresent;
3747 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3748 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3749 if (pPT->a[i].n.u1Present)
3750 {
3751 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3752 {
3753 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3754 pPT->a[i].u = 0;
3755
3756 /* Update the counter as we're removing references. */
3757 Assert(pPage->cPresent);
3758 Assert(pPool->cPresent);
3759 pPage->cPresent--;
3760 pPool->cPresent--;
3761 }
3762 if (!--cPresent)
3763 break;
3764 }
3765 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3766 break;
3767 }
3768#endif
3769 }
3770 if (!--cLeft)
3771 break;
3772 }
3773 }
3774
3775 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3776 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3777
3778 /*
3779 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3780 */
3781 if (pPool->cPresent > 1024)
3782 {
3783 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3784 return VINF_PGM_GCPHYS_ALIASED;
3785 }
3786
3787 return VINF_SUCCESS;
3788}
3789
3790
3791/**
3792 * Clears the user entry in a user table.
3793 *
3794 * This is used to remove all references to a page when flushing it.
3795 */
3796static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3797{
3798 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3799 Assert(pUser->iUser < pPool->cCurPages);
3800 uint32_t iUserTable = pUser->iUserTable;
3801
3802 /*
3803 * Map the user page. Ignore references made by fictitious pages.
3804 */
3805 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3806 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3807 union
3808 {
3809 uint64_t *pau64;
3810 uint32_t *pau32;
3811 } u;
3812 if (pUserPage->idx < PGMPOOL_IDX_FIRST)
3813 {
3814 Assert(!pUserPage->pvPageR3);
3815 return;
3816 }
3817 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3818
3819
3820 /* Safety precaution in case we change the paging for other modes too in the future. */
3821 Assert(!pgmPoolIsPageLocked(pPage));
3822
3823#ifdef VBOX_STRICT
3824 /*
3825 * Some sanity checks.
3826 */
3827 switch (pUserPage->enmKind)
3828 {
3829 case PGMPOOLKIND_32BIT_PD:
3830 case PGMPOOLKIND_32BIT_PD_PHYS:
3831 Assert(iUserTable < X86_PG_ENTRIES);
3832 break;
3833 case PGMPOOLKIND_PAE_PDPT:
3834 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3835 case PGMPOOLKIND_PAE_PDPT_PHYS:
3836 Assert(iUserTable < 4);
3837 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3838 break;
3839 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3840 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3841 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3842 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3843 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3844 case PGMPOOLKIND_PAE_PD_PHYS:
3845 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3846 break;
3847 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3848 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3849 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3850 break;
3851 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3852 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3853 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3854 break;
3855 case PGMPOOLKIND_64BIT_PML4:
3856 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3857 /* GCPhys >> PAGE_SHIFT is the index here */
3858 break;
3859 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3860 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3861 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3862 break;
3863
3864 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3865 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3866 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3867 break;
3868
3869 case PGMPOOLKIND_ROOT_NESTED:
3870 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3871 break;
3872
3873 default:
3874 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3875 break;
3876 }
3877#endif /* VBOX_STRICT */
3878
3879 /*
3880 * Clear the entry in the user page.
3881 */
3882 switch (pUserPage->enmKind)
3883 {
3884 /* 32-bit entries */
3885 case PGMPOOLKIND_32BIT_PD:
3886 case PGMPOOLKIND_32BIT_PD_PHYS:
3887 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
3888 break;
3889
3890 /* 64-bit entries */
3891 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3892 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3893 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3894 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3895 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3896#ifdef IN_RC
3897 /*
3898 * In 32 bits PAE mode we *must* invalidate the TLB when changing a
3899 * PDPT entry; the CPU fetches them only during cr3 load, so any
3900 * non-present PDPT will continue to cause page faults.
3901 */
3902 ASMReloadCR3();
3903 /* no break */
3904#endif
3905 case PGMPOOLKIND_PAE_PD_PHYS:
3906 case PGMPOOLKIND_PAE_PDPT_PHYS:
3907 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3908 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3909 case PGMPOOLKIND_64BIT_PML4:
3910 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3911 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3912 case PGMPOOLKIND_PAE_PDPT:
3913 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3914 case PGMPOOLKIND_ROOT_NESTED:
3915 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3916 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3917 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
3918 break;
3919
3920 default:
3921 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3922 }
3923 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
3924}
3925
3926
3927/**
3928 * Clears all users of a page.
3929 */
3930static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3931{
3932 /*
3933 * Free all the user records.
3934 */
3935 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3936
3937 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3938 uint16_t i = pPage->iUserHead;
3939 while (i != NIL_PGMPOOL_USER_INDEX)
3940 {
3941 /* Clear enter in user table. */
3942 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3943
3944 /* Free it. */
3945 const uint16_t iNext = paUsers[i].iNext;
3946 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3947 paUsers[i].iNext = pPool->iUserFreeHead;
3948 pPool->iUserFreeHead = i;
3949
3950 /* Next. */
3951 i = iNext;
3952 }
3953 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3954}
3955
3956
3957/**
3958 * Allocates a new physical cross reference extent.
3959 *
3960 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3961 * @param pVM Pointer to the VM.
3962 * @param piPhysExt Where to store the phys ext index.
3963 */
3964PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3965{
3966 PGM_LOCK_ASSERT_OWNER(pVM);
3967 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3968 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3969 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3970 {
3971 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3972 return NULL;
3973 }
3974 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3975 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3976 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3977 *piPhysExt = iPhysExt;
3978 return pPhysExt;
3979}
3980
3981
3982/**
3983 * Frees a physical cross reference extent.
3984 *
3985 * @param pVM Pointer to the VM.
3986 * @param iPhysExt The extent to free.
3987 */
3988void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3989{
3990 PGM_LOCK_ASSERT_OWNER(pVM);
3991 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3992 Assert(iPhysExt < pPool->cMaxPhysExts);
3993 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3994 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3995 {
3996 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3997 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3998 }
3999 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4000 pPool->iPhysExtFreeHead = iPhysExt;
4001}
4002
4003
4004/**
4005 * Frees a physical cross reference extent.
4006 *
4007 * @param pVM Pointer to the VM.
4008 * @param iPhysExt The extent to free.
4009 */
4010void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
4011{
4012 PGM_LOCK_ASSERT_OWNER(pVM);
4013 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4014
4015 const uint16_t iPhysExtStart = iPhysExt;
4016 PPGMPOOLPHYSEXT pPhysExt;
4017 do
4018 {
4019 Assert(iPhysExt < pPool->cMaxPhysExts);
4020 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4021 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4022 {
4023 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4024 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4025 }
4026
4027 /* next */
4028 iPhysExt = pPhysExt->iNext;
4029 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4030
4031 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4032 pPool->iPhysExtFreeHead = iPhysExtStart;
4033}
4034
4035
4036/**
4037 * Insert a reference into a list of physical cross reference extents.
4038 *
4039 * @returns The new tracking data for PGMPAGE.
4040 *
4041 * @param pVM Pointer to the VM.
4042 * @param iPhysExt The physical extent index of the list head.
4043 * @param iShwPT The shadow page table index.
4044 * @param iPte Page table entry
4045 *
4046 */
4047static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
4048{
4049 PGM_LOCK_ASSERT_OWNER(pVM);
4050 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4051 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4052
4053 /*
4054 * Special common cases.
4055 */
4056 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
4057 {
4058 paPhysExts[iPhysExt].aidx[1] = iShwPT;
4059 paPhysExts[iPhysExt].apte[1] = iPte;
4060 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4061 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
4062 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4063 }
4064 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
4065 {
4066 paPhysExts[iPhysExt].aidx[2] = iShwPT;
4067 paPhysExts[iPhysExt].apte[2] = iPte;
4068 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4069 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
4070 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4071 }
4072 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
4073
4074 /*
4075 * General treatment.
4076 */
4077 const uint16_t iPhysExtStart = iPhysExt;
4078 unsigned cMax = 15;
4079 for (;;)
4080 {
4081 Assert(iPhysExt < pPool->cMaxPhysExts);
4082 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4083 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
4084 {
4085 paPhysExts[iPhysExt].aidx[i] = iShwPT;
4086 paPhysExts[iPhysExt].apte[i] = iPte;
4087 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4088 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
4089 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
4090 }
4091 if (!--cMax)
4092 {
4093 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackOverflows);
4094 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4095 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
4096 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4097 }
4098
4099 /* advance */
4100 iPhysExt = paPhysExts[iPhysExt].iNext;
4101 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4102 break;
4103 }
4104
4105 /*
4106 * Add another extent to the list.
4107 */
4108 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4109 if (!pNew)
4110 {
4111 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackNoExtentsLeft);
4112 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4113 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4114 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4115 }
4116 pNew->iNext = iPhysExtStart;
4117 pNew->aidx[0] = iShwPT;
4118 pNew->apte[0] = iPte;
4119 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4120 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4121}
4122
4123
4124/**
4125 * Add a reference to guest physical page where extents are in use.
4126 *
4127 * @returns The new tracking data for PGMPAGE.
4128 *
4129 * @param pVM Pointer to the VM.
4130 * @param pPhysPage Pointer to the aPages entry in the ram range.
4131 * @param u16 The ram range flags (top 16-bits).
4132 * @param iShwPT The shadow page table index.
4133 * @param iPte Page table entry
4134 */
4135uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4136{
4137 pgmLock(pVM);
4138 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4139 {
4140 /*
4141 * Convert to extent list.
4142 */
4143 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4144 uint16_t iPhysExt;
4145 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4146 if (pPhysExt)
4147 {
4148 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4149 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliased);
4150 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4151 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4152 pPhysExt->aidx[1] = iShwPT;
4153 pPhysExt->apte[1] = iPte;
4154 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4155 }
4156 else
4157 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4158 }
4159 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4160 {
4161 /*
4162 * Insert into the extent list.
4163 */
4164 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4165 }
4166 else
4167 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedLots);
4168 pgmUnlock(pVM);
4169 return u16;
4170}
4171
4172
4173/**
4174 * Clear references to guest physical memory.
4175 *
4176 * @param pPool The pool.
4177 * @param pPage The page.
4178 * @param pPhysPage Pointer to the aPages entry in the ram range.
4179 * @param iPte Shadow PTE index
4180 */
4181void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4182{
4183 PVM pVM = pPool->CTX_SUFF(pVM);
4184 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4185 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4186
4187 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4188 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4189 {
4190 pgmLock(pVM);
4191
4192 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4193 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4194 do
4195 {
4196 Assert(iPhysExt < pPool->cMaxPhysExts);
4197
4198 /*
4199 * Look for the shadow page and check if it's all freed.
4200 */
4201 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4202 {
4203 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4204 && paPhysExts[iPhysExt].apte[i] == iPte)
4205 {
4206 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4207 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4208
4209 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4210 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4211 {
4212 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4213 pgmUnlock(pVM);
4214 return;
4215 }
4216
4217 /* we can free the node. */
4218 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4219 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4220 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4221 {
4222 /* lonely node */
4223 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4224 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4225 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4226 }
4227 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4228 {
4229 /* head */
4230 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4231 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4232 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4233 }
4234 else
4235 {
4236 /* in list */
4237 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4238 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4239 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4240 }
4241 iPhysExt = iPhysExtNext;
4242 pgmUnlock(pVM);
4243 return;
4244 }
4245 }
4246
4247 /* next */
4248 iPhysExtPrev = iPhysExt;
4249 iPhysExt = paPhysExts[iPhysExt].iNext;
4250 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4251
4252 pgmUnlock(pVM);
4253 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4254 }
4255 else /* nothing to do */
4256 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4257}
4258
4259/**
4260 * Clear references to guest physical memory.
4261 *
4262 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4263 * physical address is assumed to be correct, so the linear search can be
4264 * skipped and we can assert at an earlier point.
4265 *
4266 * @param pPool The pool.
4267 * @param pPage The page.
4268 * @param HCPhys The host physical address corresponding to the guest page.
4269 * @param GCPhys The guest physical address corresponding to HCPhys.
4270 * @param iPte Shadow PTE index
4271 */
4272static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4273{
4274 /*
4275 * Lookup the page and check if it checks out before derefing it.
4276 */
4277 PVM pVM = pPool->CTX_SUFF(pVM);
4278 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4279 if (pPhysPage)
4280 {
4281 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4282#ifdef LOG_ENABLED
4283 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4284 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4285#endif
4286 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4287 {
4288 Assert(pPage->cPresent);
4289 Assert(pPool->cPresent);
4290 pPage->cPresent--;
4291 pPool->cPresent--;
4292 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4293 return;
4294 }
4295
4296 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp\n",
4297 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage)));
4298 }
4299 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4300}
4301
4302
4303/**
4304 * Clear references to guest physical memory.
4305 *
4306 * @param pPool The pool.
4307 * @param pPage The page.
4308 * @param HCPhys The host physical address corresponding to the guest page.
4309 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4310 * @param iPte Shadow pte index
4311 */
4312void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4313{
4314 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4315
4316 /*
4317 * Try the hint first.
4318 */
4319 RTHCPHYS HCPhysHinted;
4320 PVM pVM = pPool->CTX_SUFF(pVM);
4321 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4322 if (pPhysPage)
4323 {
4324 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4325 Assert(HCPhysHinted);
4326 if (HCPhysHinted == HCPhys)
4327 {
4328 Assert(pPage->cPresent);
4329 Assert(pPool->cPresent);
4330 pPage->cPresent--;
4331 pPool->cPresent--;
4332 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4333 return;
4334 }
4335 }
4336 else
4337 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4338
4339 /*
4340 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4341 */
4342 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4343 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX);
4344 while (pRam)
4345 {
4346 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4347 while (iPage-- > 0)
4348 {
4349 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4350 {
4351 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4352 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4353 Assert(pPage->cPresent);
4354 Assert(pPool->cPresent);
4355 pPage->cPresent--;
4356 pPool->cPresent--;
4357 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4358 return;
4359 }
4360 }
4361 pRam = pRam->CTX_SUFF(pNext);
4362 }
4363
4364 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4365}
4366
4367
4368/**
4369 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4370 *
4371 * @param pPool The pool.
4372 * @param pPage The page.
4373 * @param pShwPT The shadow page table (mapping of the page).
4374 * @param pGstPT The guest page table.
4375 */
4376DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4377{
4378 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4379 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4380 {
4381 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4382 if (pShwPT->a[i].n.u1Present)
4383 {
4384 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4385 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4386 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & fPgMask, i);
4387 if (!pPage->cPresent)
4388 break;
4389 }
4390 }
4391}
4392
4393
4394/**
4395 * Clear references to guest physical memory in a PAE / 32-bit page table.
4396 *
4397 * @param pPool The pool.
4398 * @param pPage The page.
4399 * @param pShwPT The shadow page table (mapping of the page).
4400 * @param pGstPT The guest page table (just a half one).
4401 */
4402DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4403{
4404 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4405 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4406 {
4407 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4408 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4409 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4410 {
4411 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4412 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4413 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4414 if (!pPage->cPresent)
4415 break;
4416 }
4417 }
4418}
4419
4420
4421/**
4422 * Clear references to guest physical memory in a PAE / PAE page table.
4423 *
4424 * @param pPool The pool.
4425 * @param pPage The page.
4426 * @param pShwPT The shadow page table (mapping of the page).
4427 * @param pGstPT The guest page table.
4428 */
4429DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4430{
4431 RTGCPHYS const fPgMask = pPage->fA20Enabled ? X86_PTE_PAE_PG_MASK : X86_PTE_PAE_PG_MASK & ~RT_BIT_64(20);
4432 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4433 {
4434 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4435 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4436 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4437 {
4438 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4439 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4440 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4441 if (!pPage->cPresent)
4442 break;
4443 }
4444 }
4445}
4446
4447
4448/**
4449 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4450 *
4451 * @param pPool The pool.
4452 * @param pPage The page.
4453 * @param pShwPT The shadow page table (mapping of the page).
4454 */
4455DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4456{
4457 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4458 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4459 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4460 {
4461 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4462 if (pShwPT->a[i].n.u1Present)
4463 {
4464 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4465 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4466 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4467 if (!pPage->cPresent)
4468 break;
4469 }
4470 }
4471}
4472
4473
4474/**
4475 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4476 *
4477 * @param pPool The pool.
4478 * @param pPage The page.
4479 * @param pShwPT The shadow page table (mapping of the page).
4480 */
4481DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4482{
4483 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4484 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4485 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4486 {
4487 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4488 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4489 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4490 {
4491 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4492 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4493 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys & GCPhysA20Mask, i);
4494 if (!pPage->cPresent)
4495 break;
4496 }
4497 }
4498}
4499
4500
4501/**
4502 * Clear references to shadowed pages in an EPT page table.
4503 *
4504 * @param pPool The pool.
4505 * @param pPage The page.
4506 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4507 */
4508DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4509{
4510 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4511 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4512 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4513 {
4514 Assert((pShwPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4515 if (pShwPT->a[i].n.u1Present)
4516 {
4517 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4518 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4519 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4520 if (!pPage->cPresent)
4521 break;
4522 }
4523 }
4524}
4525
4526
4527/**
4528 * Clear references to shadowed pages in a 32 bits page directory.
4529 *
4530 * @param pPool The pool.
4531 * @param pPage The page.
4532 * @param pShwPD The shadow page directory (mapping of the page).
4533 */
4534DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4535{
4536 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4537 {
4538 Assert(!(pShwPD->a[i].u & RT_BIT_32(9)));
4539 if ( pShwPD->a[i].n.u1Present
4540 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4541 )
4542 {
4543 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4544 if (pSubPage)
4545 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4546 else
4547 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4548 }
4549 }
4550}
4551
4552
4553/**
4554 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4555 *
4556 * @param pPool The pool.
4557 * @param pPage The page.
4558 * @param pShwPD The shadow page directory (mapping of the page).
4559 */
4560DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4561{
4562 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4563 {
4564 if ( pShwPD->a[i].n.u1Present
4565 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING))
4566 {
4567#ifdef PGM_WITH_LARGE_PAGES
4568 if (pShwPD->a[i].b.u1Size)
4569 {
4570 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4571 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4572 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK,
4573 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4574 i);
4575 }
4576 else
4577#endif
4578 {
4579 Assert((pShwPD->a[i].u & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4580 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4581 if (pSubPage)
4582 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4583 else
4584 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4585 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4586 }
4587 }
4588 }
4589}
4590
4591
4592/**
4593 * Clear references to shadowed pages in a PAE page directory pointer table.
4594 *
4595 * @param pPool The pool.
4596 * @param pPage The page.
4597 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4598 */
4599DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4600{
4601 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4602 {
4603 Assert((pShwPDPT->a[i].u & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4604 if ( pShwPDPT->a[i].n.u1Present
4605 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4606 )
4607 {
4608 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4609 if (pSubPage)
4610 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4611 else
4612 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4613 }
4614 }
4615}
4616
4617
4618/**
4619 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4620 *
4621 * @param pPool The pool.
4622 * @param pPage The page.
4623 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4624 */
4625DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4626{
4627 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4628 {
4629 Assert((pShwPDPT->a[i].u & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4630 if (pShwPDPT->a[i].n.u1Present)
4631 {
4632 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4633 if (pSubPage)
4634 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4635 else
4636 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4637 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4638 }
4639 }
4640}
4641
4642
4643/**
4644 * Clear references to shadowed pages in a 64-bit level 4 page table.
4645 *
4646 * @param pPool The pool.
4647 * @param pPage The page.
4648 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4649 */
4650DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4651{
4652 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4653 {
4654 Assert((pShwPML4->a[i].u & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4655 if (pShwPML4->a[i].n.u1Present)
4656 {
4657 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4658 if (pSubPage)
4659 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4660 else
4661 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4662 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4663 }
4664 }
4665}
4666
4667
4668/**
4669 * Clear references to shadowed pages in an EPT page directory.
4670 *
4671 * @param pPool The pool.
4672 * @param pPage The page.
4673 * @param pShwPD The shadow page directory (mapping of the page).
4674 */
4675DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4676{
4677 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4678 {
4679 Assert((pShwPD->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4680 if (pShwPD->a[i].n.u1Present)
4681 {
4682#ifdef PGM_WITH_LARGE_PAGES
4683 if (pShwPD->a[i].b.u1Size)
4684 {
4685 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4686 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4687 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK,
4688 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4689 i);
4690 }
4691 else
4692#endif
4693 {
4694 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4695 if (pSubPage)
4696 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4697 else
4698 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4699 }
4700 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4701 }
4702 }
4703}
4704
4705
4706/**
4707 * Clear references to shadowed pages in an EPT page directory pointer table.
4708 *
4709 * @param pPool The pool.
4710 * @param pPage The page.
4711 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4712 */
4713DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4714{
4715 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4716 {
4717 Assert((pShwPDPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4718 if (pShwPDPT->a[i].n.u1Present)
4719 {
4720 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4721 if (pSubPage)
4722 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4723 else
4724 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4725 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4726 }
4727 }
4728}
4729
4730
4731/**
4732 * Clears all references made by this page.
4733 *
4734 * This includes other shadow pages and GC physical addresses.
4735 *
4736 * @param pPool The pool.
4737 * @param pPage The page.
4738 */
4739static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4740{
4741 /*
4742 * Map the shadow page and take action according to the page kind.
4743 */
4744 PVM pVM = pPool->CTX_SUFF(pVM);
4745 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4746 switch (pPage->enmKind)
4747 {
4748 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4749 {
4750 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4751 void *pvGst;
4752 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4753 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4754 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4755 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4756 break;
4757 }
4758
4759 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4760 {
4761 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4762 void *pvGst;
4763 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4764 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
4765 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4766 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4767 break;
4768 }
4769
4770 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4771 {
4772 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4773 void *pvGst;
4774 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4775 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
4776 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4777 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4778 break;
4779 }
4780
4781 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4782 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4783 {
4784 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4785 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4786 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4787 break;
4788 }
4789
4790 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4791 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4792 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4793 {
4794 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4795 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
4796 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4797 break;
4798 }
4799
4800 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4801 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4802 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4803 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4804 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4805 case PGMPOOLKIND_PAE_PD_PHYS:
4806 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4807 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4808 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4809 break;
4810
4811 case PGMPOOLKIND_32BIT_PD_PHYS:
4812 case PGMPOOLKIND_32BIT_PD:
4813 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4814 break;
4815
4816 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4817 case PGMPOOLKIND_PAE_PDPT:
4818 case PGMPOOLKIND_PAE_PDPT_PHYS:
4819 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4820 break;
4821
4822 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4823 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4824 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4825 break;
4826
4827 case PGMPOOLKIND_64BIT_PML4:
4828 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4829 break;
4830
4831 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4832 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4833 break;
4834
4835 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4836 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4837 break;
4838
4839 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4840 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4841 break;
4842
4843 default:
4844 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4845 }
4846
4847 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4848 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4849 ASMMemZeroPage(pvShw);
4850 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4851 pPage->fZeroed = true;
4852 Assert(!pPage->cPresent);
4853 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
4854}
4855
4856
4857/**
4858 * Flushes a pool page.
4859 *
4860 * This moves the page to the free list after removing all user references to it.
4861 *
4862 * @returns VBox status code.
4863 * @retval VINF_SUCCESS on success.
4864 * @param pPool The pool.
4865 * @param HCPhys The HC physical address of the shadow page.
4866 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4867 */
4868int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4869{
4870 PVM pVM = pPool->CTX_SUFF(pVM);
4871 bool fFlushRequired = false;
4872
4873 int rc = VINF_SUCCESS;
4874 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4875 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4876 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4877
4878 /*
4879 * Reject any attempts at flushing any of the special root pages (shall
4880 * not happen).
4881 */
4882 AssertMsgReturn(pPage->idx >= PGMPOOL_IDX_FIRST,
4883 ("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n",
4884 pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx),
4885 VINF_SUCCESS);
4886
4887 pgmLock(pVM);
4888
4889 /*
4890 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4891 */
4892 if (pgmPoolIsPageLocked(pPage))
4893 {
4894 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4895 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4896 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4897 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4898 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4899 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4900 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4901 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4902 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
4903 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
4904 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4905 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4906 pgmUnlock(pVM);
4907 return VINF_SUCCESS;
4908 }
4909
4910#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4911 /* Start a subset so we won't run out of mapping space. */
4912 PVMCPU pVCpu = VMMGetCpu(pVM);
4913 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
4914#endif
4915
4916 /*
4917 * Mark the page as being in need of an ASMMemZeroPage().
4918 */
4919 pPage->fZeroed = false;
4920
4921#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4922 if (pPage->fDirty)
4923 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirtyEntry, false /* do not remove */);
4924#endif
4925
4926 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4927 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4928 fFlushRequired = true;
4929
4930 /*
4931 * Clear the page.
4932 */
4933 pgmPoolTrackClearPageUsers(pPool, pPage);
4934 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4935 pgmPoolTrackDeref(pPool, pPage);
4936 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4937
4938 /*
4939 * Flush it from the cache.
4940 */
4941 pgmPoolCacheFlushPage(pPool, pPage);
4942
4943#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4944 /* Heavy stuff done. */
4945 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
4946#endif
4947
4948 /*
4949 * Deregistering the monitoring.
4950 */
4951 if (pPage->fMonitored)
4952 rc = pgmPoolMonitorFlush(pPool, pPage);
4953
4954 /*
4955 * Free the page.
4956 */
4957 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4958 pPage->iNext = pPool->iFreeHead;
4959 pPool->iFreeHead = pPage->idx;
4960 pPage->enmKind = PGMPOOLKIND_FREE;
4961 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4962 pPage->GCPhys = NIL_RTGCPHYS;
4963 pPage->fReusedFlushPending = false;
4964
4965 pPool->cUsedPages--;
4966
4967 /* Flush the TLBs of all VCPUs if required. */
4968 if ( fFlushRequired
4969 && fFlush)
4970 {
4971 PGM_INVL_ALL_VCPU_TLBS(pVM);
4972 }
4973
4974 pgmUnlock(pVM);
4975 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4976 return rc;
4977}
4978
4979
4980/**
4981 * Frees a usage of a pool page.
4982 *
4983 * The caller is responsible to updating the user table so that it no longer
4984 * references the shadow page.
4985 *
4986 * @param pPool The pool.
4987 * @param HCPhys The HC physical address of the shadow page.
4988 * @param iUser The shadow page pool index of the user table.
4989 * NIL_PGMPOOL_IDX for root pages.
4990 * @param iUserTable The index into the user table (shadowed). Ignored if
4991 * root page.
4992 */
4993void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4994{
4995 PVM pVM = pPool->CTX_SUFF(pVM);
4996
4997 STAM_PROFILE_START(&pPool->StatFree, a);
4998 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
4999 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
5000 AssertReturnVoid(pPage->idx >= PGMPOOL_IDX_FIRST); /* paranoia (#6349) */
5001
5002 pgmLock(pVM);
5003 if (iUser != NIL_PGMPOOL_IDX)
5004 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
5005 if (!pPage->fCached)
5006 pgmPoolFlushPage(pPool, pPage);
5007 pgmUnlock(pVM);
5008 STAM_PROFILE_STOP(&pPool->StatFree, a);
5009}
5010
5011
5012/**
5013 * Makes one or more free page free.
5014 *
5015 * @returns VBox status code.
5016 * @retval VINF_SUCCESS on success.
5017 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
5018 *
5019 * @param pPool The pool.
5020 * @param enmKind Page table kind
5021 * @param iUser The user of the page.
5022 */
5023static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
5024{
5025 PVM pVM = pPool->CTX_SUFF(pVM);
5026 LogFlow(("pgmPoolMakeMoreFreePages: enmKind=%d iUser=%d\n", enmKind, iUser));
5027 NOREF(enmKind);
5028
5029 /*
5030 * If the pool isn't full grown yet, expand it.
5031 */
5032 if ( pPool->cCurPages < pPool->cMaxPages
5033#if defined(IN_RC)
5034 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
5035 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
5036 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
5037#endif
5038 )
5039 {
5040 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
5041#ifdef IN_RING3
5042 int rc = PGMR3PoolGrow(pVM);
5043#else
5044 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
5045#endif
5046 if (RT_FAILURE(rc))
5047 return rc;
5048 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
5049 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
5050 return VINF_SUCCESS;
5051 }
5052
5053 /*
5054 * Free one cached page.
5055 */
5056 return pgmPoolCacheFreeOne(pPool, iUser);
5057}
5058
5059
5060/**
5061 * Allocates a page from the pool.
5062 *
5063 * This page may actually be a cached page and not in need of any processing
5064 * on the callers part.
5065 *
5066 * @returns VBox status code.
5067 * @retval VINF_SUCCESS if a NEW page was allocated.
5068 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
5069 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
5070 *
5071 * @param pVM Pointer to the VM.
5072 * @param GCPhys The GC physical address of the page we're gonna shadow.
5073 * For 4MB and 2MB PD entries, it's the first address the
5074 * shadow PT is covering.
5075 * @param enmKind The kind of mapping.
5076 * @param enmAccess Access type for the mapping (only relevant for big pages)
5077 * @param fA20Enabled Whether the A20 gate is enabled or not.
5078 * @param iUser The shadow page pool index of the user table. Root
5079 * pages should pass NIL_PGMPOOL_IDX.
5080 * @param iUserTable The index into the user table (shadowed). Ignored for
5081 * root pages (iUser == NIL_PGMPOOL_IDX).
5082 * @param fLockPage Lock the page
5083 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
5084 */
5085int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
5086 uint16_t iUser, uint32_t iUserTable, bool fLockPage, PPPGMPOOLPAGE ppPage)
5087{
5088 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5089 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
5090 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
5091 *ppPage = NULL;
5092 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
5093 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
5094 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
5095
5096 pgmLock(pVM);
5097
5098 if (pPool->fCacheEnabled)
5099 {
5100 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, fA20Enabled, iUser, iUserTable, ppPage);
5101 if (RT_SUCCESS(rc2))
5102 {
5103 if (fLockPage)
5104 pgmPoolLockPage(pPool, *ppPage);
5105 pgmUnlock(pVM);
5106 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5107 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
5108 return rc2;
5109 }
5110 }
5111
5112 /*
5113 * Allocate a new one.
5114 */
5115 int rc = VINF_SUCCESS;
5116 uint16_t iNew = pPool->iFreeHead;
5117 if (iNew == NIL_PGMPOOL_IDX)
5118 {
5119 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
5120 if (RT_FAILURE(rc))
5121 {
5122 pgmUnlock(pVM);
5123 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
5124 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5125 return rc;
5126 }
5127 iNew = pPool->iFreeHead;
5128 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_PGM_POOL_IPE);
5129 }
5130
5131 /* unlink the free head */
5132 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5133 pPool->iFreeHead = pPage->iNext;
5134 pPage->iNext = NIL_PGMPOOL_IDX;
5135
5136 /*
5137 * Initialize it.
5138 */
5139 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5140 pPage->enmKind = enmKind;
5141 pPage->enmAccess = enmAccess;
5142 pPage->GCPhys = GCPhys;
5143 pPage->fA20Enabled = fA20Enabled;
5144 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5145 pPage->fMonitored = false;
5146 pPage->fCached = false;
5147 pPage->fDirty = false;
5148 pPage->fReusedFlushPending = false;
5149 pPage->cModifications = 0;
5150 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5151 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5152 pPage->cPresent = 0;
5153 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5154 pPage->idxDirtyEntry = 0;
5155 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5156 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5157 pPage->cLastAccessHandler = 0;
5158 pPage->cLocked = 0;
5159# ifdef VBOX_STRICT
5160 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5161# endif
5162
5163 /*
5164 * Insert into the tracking and cache. If this fails, free the page.
5165 */
5166 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5167 if (RT_FAILURE(rc3))
5168 {
5169 pPool->cUsedPages--;
5170 pPage->enmKind = PGMPOOLKIND_FREE;
5171 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5172 pPage->GCPhys = NIL_RTGCPHYS;
5173 pPage->iNext = pPool->iFreeHead;
5174 pPool->iFreeHead = pPage->idx;
5175 pgmUnlock(pVM);
5176 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5177 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
5178 return rc3;
5179 }
5180
5181 /*
5182 * Commit the allocation, clear the page and return.
5183 */
5184#ifdef VBOX_WITH_STATISTICS
5185 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5186 pPool->cUsedPagesHigh = pPool->cUsedPages;
5187#endif
5188
5189 if (!pPage->fZeroed)
5190 {
5191 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5192 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5193 ASMMemZeroPage(pv);
5194 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5195 }
5196
5197 *ppPage = pPage;
5198 if (fLockPage)
5199 pgmPoolLockPage(pPool, pPage);
5200 pgmUnlock(pVM);
5201 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5202 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5203 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5204 return rc;
5205}
5206
5207
5208/**
5209 * Frees a usage of a pool page.
5210 *
5211 * @param pVM Pointer to the VM.
5212 * @param HCPhys The HC physical address of the shadow page.
5213 * @param iUser The shadow page pool index of the user table.
5214 * NIL_PGMPOOL_IDX if root page.
5215 * @param iUserTable The index into the user table (shadowed). Ignored if
5216 * root page.
5217 */
5218void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5219{
5220 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5221 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5222 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5223}
5224
5225
5226/**
5227 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5228 *
5229 * @returns Pointer to the shadow page structure.
5230 * @param pPool The pool.
5231 * @param HCPhys The HC physical address of the shadow page.
5232 */
5233PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5234{
5235 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5236
5237 /*
5238 * Look up the page.
5239 */
5240 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5241
5242 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5243 return pPage;
5244}
5245
5246
5247/**
5248 * Internal worker for finding a page for debugging purposes, no assertions.
5249 *
5250 * @returns Pointer to the shadow page structure. NULL on if not found.
5251 * @param pPool The pool.
5252 * @param HCPhys The HC physical address of the shadow page.
5253 */
5254PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5255{
5256 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5257 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5258}
5259
5260#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5261
5262/**
5263 * Flush the specified page if present
5264 *
5265 * @param pVM Pointer to the VM.
5266 * @param GCPhys Guest physical address of the page to flush
5267 */
5268void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5269{
5270 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5271
5272 VM_ASSERT_EMT(pVM);
5273
5274 /*
5275 * Look up the GCPhys in the hash.
5276 */
5277 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5278 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5279 if (i == NIL_PGMPOOL_IDX)
5280 return;
5281
5282 do
5283 {
5284 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5285 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5286 {
5287 switch (pPage->enmKind)
5288 {
5289 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5290 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5291 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5292 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5293 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5294 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5295 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5296 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5297 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5298 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5299 case PGMPOOLKIND_64BIT_PML4:
5300 case PGMPOOLKIND_32BIT_PD:
5301 case PGMPOOLKIND_PAE_PDPT:
5302 {
5303 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5304#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5305 if (pPage->fDirty)
5306 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5307 else
5308#endif
5309 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5310 Assert(!pgmPoolIsPageLocked(pPage));
5311 pgmPoolMonitorChainFlush(pPool, pPage);
5312 return;
5313 }
5314
5315 /* ignore, no monitoring. */
5316 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5317 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5318 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5319 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5320 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5321 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5322 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5323 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5324 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5325 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5326 case PGMPOOLKIND_ROOT_NESTED:
5327 case PGMPOOLKIND_PAE_PD_PHYS:
5328 case PGMPOOLKIND_PAE_PDPT_PHYS:
5329 case PGMPOOLKIND_32BIT_PD_PHYS:
5330 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5331 break;
5332
5333 default:
5334 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5335 }
5336 }
5337
5338 /* next */
5339 i = pPage->iNext;
5340 } while (i != NIL_PGMPOOL_IDX);
5341 return;
5342}
5343
5344#endif /* IN_RING3 */
5345#ifdef IN_RING3
5346
5347/**
5348 * Reset CPU on hot plugging.
5349 *
5350 * @param pVM Pointer to the VM.
5351 * @param pVCpu The virtual CPU.
5352 */
5353void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5354{
5355 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5356
5357 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5358 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5359 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5360}
5361
5362
5363/**
5364 * Flushes the entire cache.
5365 *
5366 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5367 * this and execute this CR3 flush.
5368 *
5369 * @param pPool The pool.
5370 */
5371void pgmR3PoolReset(PVM pVM)
5372{
5373 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5374
5375 PGM_LOCK_ASSERT_OWNER(pVM);
5376 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5377 LogFlow(("pgmR3PoolReset:\n"));
5378
5379 /*
5380 * If there are no pages in the pool, there is nothing to do.
5381 */
5382 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5383 {
5384 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5385 return;
5386 }
5387
5388 /*
5389 * Exit the shadow mode since we're going to clear everything,
5390 * including the root page.
5391 */
5392 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5393 pgmR3ExitShadowModeBeforePoolFlush(&pVM->aCpus[i]);
5394
5395 /*
5396 * Nuke the free list and reinsert all pages into it.
5397 */
5398 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5399 {
5400 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5401
5402 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
5403 if (pPage->fMonitored)
5404 pgmPoolMonitorFlush(pPool, pPage);
5405 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5406 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5407 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5408 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5409 pPage->cModifications = 0;
5410 pPage->GCPhys = NIL_RTGCPHYS;
5411 pPage->enmKind = PGMPOOLKIND_FREE;
5412 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5413 Assert(pPage->idx == i);
5414 pPage->iNext = i + 1;
5415 pPage->fA20Enabled = true;
5416 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5417 pPage->fSeenNonGlobal = false;
5418 pPage->fMonitored = false;
5419 pPage->fDirty = false;
5420 pPage->fCached = false;
5421 pPage->fReusedFlushPending = false;
5422 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5423 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5424 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5425 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5426 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5427 pPage->cLastAccessHandler = 0;
5428 pPage->cLocked = 0;
5429#ifdef VBOX_STRICT
5430 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5431#endif
5432 }
5433 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5434 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5435 pPool->cUsedPages = 0;
5436
5437 /*
5438 * Zap and reinitialize the user records.
5439 */
5440 pPool->cPresent = 0;
5441 pPool->iUserFreeHead = 0;
5442 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5443 const unsigned cMaxUsers = pPool->cMaxUsers;
5444 for (unsigned i = 0; i < cMaxUsers; i++)
5445 {
5446 paUsers[i].iNext = i + 1;
5447 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5448 paUsers[i].iUserTable = 0xfffffffe;
5449 }
5450 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5451
5452 /*
5453 * Clear all the GCPhys links and rebuild the phys ext free list.
5454 */
5455 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
5456 pRam;
5457 pRam = pRam->CTX_SUFF(pNext))
5458 {
5459 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5460 while (iPage-- > 0)
5461 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5462 }
5463
5464 pPool->iPhysExtFreeHead = 0;
5465 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5466 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5467 for (unsigned i = 0; i < cMaxPhysExts; i++)
5468 {
5469 paPhysExts[i].iNext = i + 1;
5470 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5471 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5472 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5473 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5474 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5475 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5476 }
5477 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5478
5479 /*
5480 * Just zap the modified list.
5481 */
5482 pPool->cModifiedPages = 0;
5483 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5484
5485 /*
5486 * Clear the GCPhys hash and the age list.
5487 */
5488 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5489 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5490 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5491 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5492
5493#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5494 /* Clear all dirty pages. */
5495 pPool->idxFreeDirtyPage = 0;
5496 pPool->cDirtyPages = 0;
5497 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
5498 pPool->aDirtyPages[i].uIdx = NIL_PGMPOOL_IDX;
5499#endif
5500
5501 /*
5502 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5503 */
5504 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5505 {
5506 /*
5507 * Re-enter the shadowing mode and assert Sync CR3 FF.
5508 */
5509 PVMCPU pVCpu = &pVM->aCpus[i];
5510 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5511 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5512 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5513 }
5514
5515 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5516}
5517
5518#endif /* IN_RING3 */
5519
5520#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5521/**
5522 * Stringifies a PGMPOOLKIND value.
5523 */
5524static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5525{
5526 switch ((PGMPOOLKIND)enmKind)
5527 {
5528 case PGMPOOLKIND_INVALID:
5529 return "PGMPOOLKIND_INVALID";
5530 case PGMPOOLKIND_FREE:
5531 return "PGMPOOLKIND_FREE";
5532 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5533 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5534 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5535 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5536 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5537 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5538 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5539 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5540 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5541 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5542 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5543 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5544 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5545 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5546 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5547 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5548 case PGMPOOLKIND_32BIT_PD:
5549 return "PGMPOOLKIND_32BIT_PD";
5550 case PGMPOOLKIND_32BIT_PD_PHYS:
5551 return "PGMPOOLKIND_32BIT_PD_PHYS";
5552 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5553 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5554 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5555 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5556 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5557 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5558 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5559 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5560 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5561 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5562 case PGMPOOLKIND_PAE_PD_PHYS:
5563 return "PGMPOOLKIND_PAE_PD_PHYS";
5564 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5565 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5566 case PGMPOOLKIND_PAE_PDPT:
5567 return "PGMPOOLKIND_PAE_PDPT";
5568 case PGMPOOLKIND_PAE_PDPT_PHYS:
5569 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5570 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5571 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5572 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5573 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5574 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5575 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5576 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5577 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5578 case PGMPOOLKIND_64BIT_PML4:
5579 return "PGMPOOLKIND_64BIT_PML4";
5580 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5581 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5582 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5583 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5584 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5585 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5586 case PGMPOOLKIND_ROOT_NESTED:
5587 return "PGMPOOLKIND_ROOT_NESTED";
5588 }
5589 return "Unknown kind!";
5590}
5591#endif /* LOG_ENABLED || VBOX_STRICT */
5592
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette