VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 56225

Last change on this file since 56225 was 56044, checked in by vboxsync, 10 years ago

PGMAllPool.cpp: doc update

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 212.8 KB
Line 
1/* $Id: PGMAllPool.cpp 56044 2015-05-22 21:05:00Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2013 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#include <VBox/vmm/pgm.h>
24#include <VBox/vmm/mm.h>
25#include <VBox/vmm/em.h>
26#include <VBox/vmm/cpum.h>
27#ifdef IN_RC
28# include <VBox/vmm/patm.h>
29#endif
30#include "PGMInternal.h"
31#include <VBox/vmm/vm.h>
32#include "PGMInline.h"
33#include <VBox/disopcode.h>
34#include <VBox/vmm/hm_vmx.h>
35
36#include <VBox/log.h>
37#include <VBox/err.h>
38#include <iprt/asm.h>
39#include <iprt/asm-amd64-x86.h>
40#include <iprt/string.h>
41
42
43/*******************************************************************************
44* Internal Functions *
45*******************************************************************************/
46RT_C_DECLS_BEGIN
47DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
48DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
49static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
50static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
51static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
52static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
54static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
55#endif
56#if 0 /*defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)*/
57static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
58#endif
59
60int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
61PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
62void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
63void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
64
65RT_C_DECLS_END
66
67
68/**
69 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
70 *
71 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
72 * @param enmKind The page kind.
73 */
74DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
75{
76 switch (enmKind)
77 {
78 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
79 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
81 return true;
82 default:
83 return false;
84 }
85}
86
87
88/**
89 * Flushes a chain of pages sharing the same access monitor.
90 *
91 * @returns VBox status code suitable for scheduling.
92 * @param pPool The pool.
93 * @param pPage A page in the chain.
94 * @todo VBOXSTRICTRC
95 */
96int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
97{
98 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
99
100 /*
101 * Find the list head.
102 */
103 uint16_t idx = pPage->idx;
104 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
105 {
106 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
107 {
108 idx = pPage->iMonitoredPrev;
109 Assert(idx != pPage->idx);
110 pPage = &pPool->aPages[idx];
111 }
112 }
113
114 /*
115 * Iterate the list flushing each shadow page.
116 */
117 int rc = VINF_SUCCESS;
118 for (;;)
119 {
120 idx = pPage->iMonitoredNext;
121 Assert(idx != pPage->idx);
122 if (pPage->idx >= PGMPOOL_IDX_FIRST)
123 {
124 int rc2 = pgmPoolFlushPage(pPool, pPage);
125 AssertRC(rc2);
126 }
127 /* next */
128 if (idx == NIL_PGMPOOL_IDX)
129 break;
130 pPage = &pPool->aPages[idx];
131 }
132 return rc;
133}
134
135
136/**
137 * Wrapper for getting the current context pointer to the entry being modified.
138 *
139 * @returns VBox status code suitable for scheduling.
140 * @param pVM Pointer to the VM.
141 * @param pvDst Destination address
142 * @param pvSrc Pointer to the mapping of @a GCPhysSrc or NULL depending
143 * on the context (e.g. \#PF in R0 & RC).
144 * @param GCPhysSrc The source guest physical address.
145 * @param cb Size of data to read
146 */
147DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, void const *pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
148{
149#if defined(IN_RING3)
150 NOREF(pVM); NOREF(GCPhysSrc);
151 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
152 return VINF_SUCCESS;
153#else
154 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
155 NOREF(pvSrc);
156 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
157#endif
158}
159
160
161/**
162 * Process shadow entries before they are changed by the guest.
163 *
164 * For PT entries we will clear them. For PD entries, we'll simply check
165 * for mapping conflicts and set the SyncCR3 FF if found.
166 *
167 * @param pVCpu Pointer to the VMCPU.
168 * @param pPool The pool.
169 * @param pPage The head page.
170 * @param GCPhysFault The guest physical fault address.
171 * @param pvAddress Pointer to the mapping of @a GCPhysFault or NULL
172 * depending on the context (e.g. \#PF in R0 & RC).
173 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
174 */
175static void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault,
176 void const *pvAddress, unsigned cbWrite)
177{
178 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
179 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
180 PVM pVM = pPool->CTX_SUFF(pVM);
181 NOREF(pVCpu);
182
183 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))pvAddress, GCPhysFault, cbWrite));
184
185 for (;;)
186 {
187 union
188 {
189 void *pv;
190 PX86PT pPT;
191 PPGMSHWPTPAE pPTPae;
192 PX86PD pPD;
193 PX86PDPAE pPDPae;
194 PX86PDPT pPDPT;
195 PX86PML4 pPML4;
196 } uShw;
197
198 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s\n", pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
199
200 uShw.pv = NULL;
201 switch (pPage->enmKind)
202 {
203 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
204 {
205 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
206 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
207 const unsigned iShw = off / sizeof(X86PTE);
208 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
209 if (uShw.pPT->a[iShw].n.u1Present)
210 {
211 X86PTE GstPte;
212
213 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
214 AssertRC(rc);
215 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
216 pgmPoolTracDerefGCPhysHint(pPool, pPage,
217 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
218 GstPte.u & X86_PTE_PG_MASK,
219 iShw);
220 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
221 }
222 break;
223 }
224
225 /* page/2 sized */
226 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
227 {
228 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
229 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
230 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
231 {
232 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
233 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
234 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
235 {
236 X86PTE GstPte;
237 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
238 AssertRC(rc);
239
240 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
241 pgmPoolTracDerefGCPhysHint(pPool, pPage,
242 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
243 GstPte.u & X86_PTE_PG_MASK,
244 iShw);
245 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
246 }
247 }
248 break;
249 }
250
251 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
252 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
253 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
254 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
255 {
256 unsigned iGst = off / sizeof(X86PDE);
257 unsigned iShwPdpt = iGst / 256;
258 unsigned iShw = (iGst % 256) * 2;
259 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
260
261 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
262 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
263 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
264 {
265 for (unsigned i = 0; i < 2; i++)
266 {
267# ifdef VBOX_WITH_RAW_MODE_NOT_R0
268 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
269 {
270 Assert(pgmMapAreMappingsEnabled(pVM));
271 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
272 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
273 break;
274 }
275# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
276 if (uShw.pPDPae->a[iShw+i].n.u1Present)
277 {
278 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
279 pgmPoolFree(pVM,
280 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
281 pPage->idx,
282 iShw + i);
283 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw+i].u, 0);
284 }
285
286 /* paranoia / a bit assumptive. */
287 if ( (off & 3)
288 && (off & 3) + cbWrite > 4)
289 {
290 const unsigned iShw2 = iShw + 2 + i;
291 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
292 {
293# ifdef VBOX_WITH_RAW_MODE_NOT_R0
294 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
295 {
296 Assert(pgmMapAreMappingsEnabled(pVM));
297 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
298 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
299 break;
300 }
301# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
302 if (uShw.pPDPae->a[iShw2].n.u1Present)
303 {
304 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
305 pgmPoolFree(pVM,
306 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
307 pPage->idx,
308 iShw2);
309 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
310 }
311 }
312 }
313 }
314 }
315 break;
316 }
317
318 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
319 {
320 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
321 const unsigned iShw = off / sizeof(X86PTEPAE);
322 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
323 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
324 {
325 X86PTEPAE GstPte;
326 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
327 AssertRC(rc);
328
329 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
330 pgmPoolTracDerefGCPhysHint(pPool, pPage,
331 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
332 GstPte.u & X86_PTE_PAE_PG_MASK,
333 iShw);
334 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
335 }
336
337 /* paranoia / a bit assumptive. */
338 if ( (off & 7)
339 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
340 {
341 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
342 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
343
344 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
345 {
346 X86PTEPAE GstPte;
347 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
348 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
349 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
350 AssertRC(rc);
351 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
352 pgmPoolTracDerefGCPhysHint(pPool, pPage,
353 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
354 GstPte.u & X86_PTE_PAE_PG_MASK,
355 iShw2);
356 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
357 }
358 }
359 break;
360 }
361
362 case PGMPOOLKIND_32BIT_PD:
363 {
364 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
365 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
366
367 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
368 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
369# ifdef VBOX_WITH_RAW_MODE_NOT_R0
370 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
371 {
372 Assert(pgmMapAreMappingsEnabled(pVM));
373 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
374 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
375 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
376 break;
377 }
378 else
379# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
380 {
381 if (uShw.pPD->a[iShw].n.u1Present)
382 {
383 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
384 pgmPoolFree(pVM,
385 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
386 pPage->idx,
387 iShw);
388 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
389 }
390 }
391 /* paranoia / a bit assumptive. */
392 if ( (off & 3)
393 && (off & 3) + cbWrite > sizeof(X86PTE))
394 {
395 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
396 if ( iShw2 != iShw
397 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
398 {
399# ifdef VBOX_WITH_RAW_MODE_NOT_R0
400 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
401 {
402 Assert(pgmMapAreMappingsEnabled(pVM));
403 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
404 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
405 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
406 break;
407 }
408# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
409 if (uShw.pPD->a[iShw2].n.u1Present)
410 {
411 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
412 pgmPoolFree(pVM,
413 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
414 pPage->idx,
415 iShw2);
416 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
417 }
418 }
419 }
420#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). - not working any longer... */
421 if ( uShw.pPD->a[iShw].n.u1Present
422 && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
423 {
424 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
425# ifdef IN_RC /* TLB load - we're pushing things a bit... */
426 ASMProbeReadByte(pvAddress);
427# endif
428 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
429 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
430 }
431#endif
432 break;
433 }
434
435 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
436 {
437 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
438 const unsigned iShw = off / sizeof(X86PDEPAE);
439 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
440#ifdef VBOX_WITH_RAW_MODE_NOT_R0
441 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
442 {
443 Assert(pgmMapAreMappingsEnabled(pVM));
444 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
445 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
446 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
447 break;
448 }
449#endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
450 /*
451 * Causes trouble when the guest uses a PDE to refer to the whole page table level
452 * structure. (Invalidate here; faults later on when it tries to change the page
453 * table entries -> recheck; probably only applies to the RC case.)
454 */
455#ifdef VBOX_WITH_RAW_MODE_NOT_R0
456 else
457#endif
458 {
459 if (uShw.pPDPae->a[iShw].n.u1Present)
460 {
461 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
462 pgmPoolFree(pVM,
463 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
464 pPage->idx,
465 iShw);
466 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
467 }
468 }
469 /* paranoia / a bit assumptive. */
470 if ( (off & 7)
471 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
472 {
473 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
474 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
475
476#ifdef VBOX_WITH_RAW_MODE_NOT_R0
477 if ( iShw2 != iShw
478 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
479 {
480 Assert(pgmMapAreMappingsEnabled(pVM));
481 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
482 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
483 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
484 break;
485 }
486 else
487#endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
488 if (uShw.pPDPae->a[iShw2].n.u1Present)
489 {
490 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
491 pgmPoolFree(pVM,
492 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
493 pPage->idx,
494 iShw2);
495 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
496 }
497 }
498 break;
499 }
500
501 case PGMPOOLKIND_PAE_PDPT:
502 {
503 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
504 /*
505 * Hopefully this doesn't happen very often:
506 * - touching unused parts of the page
507 * - messing with the bits of pd pointers without changing the physical address
508 */
509 /* PDPT roots are not page aligned; 32 byte only! */
510 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
511
512 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
513 const unsigned iShw = offPdpt / sizeof(X86PDPE);
514 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
515 {
516# ifdef VBOX_WITH_RAW_MODE_NOT_R0
517 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
518 {
519 Assert(pgmMapAreMappingsEnabled(pVM));
520 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
521 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
522 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
523 break;
524 }
525 else
526# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
527 if (uShw.pPDPT->a[iShw].n.u1Present)
528 {
529 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
530 pgmPoolFree(pVM,
531 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
532 pPage->idx,
533 iShw);
534 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
535 }
536
537 /* paranoia / a bit assumptive. */
538 if ( (offPdpt & 7)
539 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
540 {
541 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
542 if ( iShw2 != iShw
543 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
544 {
545# ifdef VBOX_WITH_RAW_MODE_NOT_R0
546 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
547 {
548 Assert(pgmMapAreMappingsEnabled(pVM));
549 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
550 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
551 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
552 break;
553 }
554 else
555# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
556 if (uShw.pPDPT->a[iShw2].n.u1Present)
557 {
558 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
559 pgmPoolFree(pVM,
560 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
561 pPage->idx,
562 iShw2);
563 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
564 }
565 }
566 }
567 }
568 break;
569 }
570
571#ifndef IN_RC
572 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
573 {
574 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
575 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
576 const unsigned iShw = off / sizeof(X86PDEPAE);
577 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
578 if (uShw.pPDPae->a[iShw].n.u1Present)
579 {
580 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
581 pgmPoolFree(pVM,
582 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
583 pPage->idx,
584 iShw);
585 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
586 }
587 /* paranoia / a bit assumptive. */
588 if ( (off & 7)
589 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
590 {
591 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
592 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
593
594 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
595 if (uShw.pPDPae->a[iShw2].n.u1Present)
596 {
597 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
598 pgmPoolFree(pVM,
599 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
600 pPage->idx,
601 iShw2);
602 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
603 }
604 }
605 break;
606 }
607
608 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
609 {
610 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
611 /*
612 * Hopefully this doesn't happen very often:
613 * - messing with the bits of pd pointers without changing the physical address
614 */
615 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
616 const unsigned iShw = off / sizeof(X86PDPE);
617 if (uShw.pPDPT->a[iShw].n.u1Present)
618 {
619 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
620 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
621 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
622 }
623 /* paranoia / a bit assumptive. */
624 if ( (off & 7)
625 && (off & 7) + cbWrite > sizeof(X86PDPE))
626 {
627 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
628 if (uShw.pPDPT->a[iShw2].n.u1Present)
629 {
630 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
631 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
632 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
633 }
634 }
635 break;
636 }
637
638 case PGMPOOLKIND_64BIT_PML4:
639 {
640 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
641 /*
642 * Hopefully this doesn't happen very often:
643 * - messing with the bits of pd pointers without changing the physical address
644 */
645 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
646 const unsigned iShw = off / sizeof(X86PDPE);
647 if (uShw.pPML4->a[iShw].n.u1Present)
648 {
649 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
650 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
651 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
652 }
653 /* paranoia / a bit assumptive. */
654 if ( (off & 7)
655 && (off & 7) + cbWrite > sizeof(X86PDPE))
656 {
657 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
658 if (uShw.pPML4->a[iShw2].n.u1Present)
659 {
660 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
661 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
662 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
663 }
664 }
665 break;
666 }
667#endif /* IN_RING0 */
668
669 default:
670 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
671 }
672 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
673
674 /* next */
675 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
676 return;
677 pPage = &pPool->aPages[pPage->iMonitoredNext];
678 }
679}
680
681# ifndef IN_RING3
682
683/**
684 * Checks if a access could be a fork operation in progress.
685 *
686 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
687 *
688 * @returns true if it's likely that we're forking, otherwise false.
689 * @param pPool The pool.
690 * @param pDis The disassembled instruction.
691 * @param offFault The access offset.
692 */
693DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
694{
695 /*
696 * i386 linux is using btr to clear X86_PTE_RW.
697 * The functions involved are (2.6.16 source inspection):
698 * clear_bit
699 * ptep_set_wrprotect
700 * copy_one_pte
701 * copy_pte_range
702 * copy_pmd_range
703 * copy_pud_range
704 * copy_page_range
705 * dup_mmap
706 * dup_mm
707 * copy_mm
708 * copy_process
709 * do_fork
710 */
711 if ( pDis->pCurInstr->uOpcode == OP_BTR
712 && !(offFault & 4)
713 /** @todo Validate that the bit index is X86_PTE_RW. */
714 )
715 {
716 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
717 return true;
718 }
719 return false;
720}
721
722
723/**
724 * Determine whether the page is likely to have been reused.
725 *
726 * @returns true if we consider the page as being reused for a different purpose.
727 * @returns false if we consider it to still be a paging page.
728 * @param pVM Pointer to the VM.
729 * @param pVCpu Pointer to the VMCPU.
730 * @param pRegFrame Trap register frame.
731 * @param pDis The disassembly info for the faulting instruction.
732 * @param pvFault The fault address.
733 *
734 * @remark The REP prefix check is left to the caller because of STOSD/W.
735 */
736DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
737{
738#ifndef IN_RC
739 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
740 if ( HMHasPendingIrq(pVM)
741 && (pRegFrame->rsp - pvFault) < 32)
742 {
743 /* Fault caused by stack writes while trying to inject an interrupt event. */
744 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
745 return true;
746 }
747#else
748 NOREF(pVM); NOREF(pvFault);
749#endif
750
751 LogFlow(("Reused instr %RGv %d at %RGv param1.fUse=%llx param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->uOpcode, pvFault, pDis->Param1.fUse, pDis->Param1.Base.idxGenReg));
752
753 /* Non-supervisor mode write means it's used for something else. */
754 if (CPUMGetGuestCPL(pVCpu) == 3)
755 return true;
756
757 switch (pDis->pCurInstr->uOpcode)
758 {
759 /* call implies the actual push of the return address faulted */
760 case OP_CALL:
761 Log4(("pgmPoolMonitorIsReused: CALL\n"));
762 return true;
763 case OP_PUSH:
764 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
765 return true;
766 case OP_PUSHF:
767 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
768 return true;
769 case OP_PUSHA:
770 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
771 return true;
772 case OP_FXSAVE:
773 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
774 return true;
775 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
776 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
777 return true;
778 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
779 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
780 return true;
781 case OP_MOVSWD:
782 case OP_STOSWD:
783 if ( pDis->fPrefix == (DISPREFIX_REP|DISPREFIX_REX)
784 && pRegFrame->rcx >= 0x40
785 )
786 {
787 Assert(pDis->uCpuMode == DISCPUMODE_64BIT);
788
789 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
790 return true;
791 }
792 return false;
793 }
794 if ( ( (pDis->Param1.fUse & DISUSE_REG_GEN32)
795 || (pDis->Param1.fUse & DISUSE_REG_GEN64))
796 && (pDis->Param1.Base.idxGenReg == DISGREG_ESP))
797 {
798 Log4(("pgmPoolMonitorIsReused: ESP\n"));
799 return true;
800 }
801
802 return false;
803}
804
805
806/**
807 * Flushes the page being accessed.
808 *
809 * @returns VBox status code suitable for scheduling.
810 * @param pVM Pointer to the VM.
811 * @param pVCpu Pointer to the VMCPU.
812 * @param pPool The pool.
813 * @param pPage The pool page (head).
814 * @param pDis The disassembly of the write instruction.
815 * @param pRegFrame The trap register frame.
816 * @param GCPhysFault The fault address as guest physical address.
817 * @param pvFault The fault address.
818 * @todo VBOXSTRICTRC
819 */
820static int pgmPoolAccessPfHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
821 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
822{
823 NOREF(pVM); NOREF(GCPhysFault);
824
825 /*
826 * First, do the flushing.
827 */
828 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
829
830 /*
831 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
832 * Must do this in raw mode (!); XP boot will fail otherwise.
833 */
834 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
835 if (rc2 == VINF_SUCCESS)
836 { /* do nothing */ }
837#ifdef VBOX_WITH_IEM
838 else if (rc2 == VINF_EM_RESCHEDULE)
839 {
840 if (rc == VINF_SUCCESS)
841 rc = VBOXSTRICTRC_VAL(rc2);
842# ifndef IN_RING3
843 VMCPU_FF_SET(pVCpu, VMCPU_FF_TO_R3);
844# endif
845 }
846#endif
847 else if (rc2 == VERR_EM_INTERPRETER)
848 {
849#ifdef IN_RC
850 if (PATMIsPatchGCAddr(pVM, pRegFrame->eip))
851 {
852 LogFlow(("pgmPoolAccessPfHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
853 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->eip));
854 rc = VINF_SUCCESS;
855 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
856 }
857 else
858#endif
859 {
860 rc = VINF_EM_RAW_EMULATE_INSTR;
861 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
862 }
863 }
864 else if (RT_FAILURE_NP(rc2))
865 rc = VBOXSTRICTRC_VAL(rc2);
866 else
867 AssertMsgFailed(("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
868
869 LogFlow(("pgmPoolAccessPfHandlerPT: returns %Rrc (flushed)\n", rc));
870 return rc;
871}
872
873
874/**
875 * Handles the STOSD write accesses.
876 *
877 * @returns VBox status code suitable for scheduling.
878 * @param pVM Pointer to the VM.
879 * @param pPool The pool.
880 * @param pPage The pool page (head).
881 * @param pDis The disassembly of the write instruction.
882 * @param pRegFrame The trap register frame.
883 * @param GCPhysFault The fault address as guest physical address.
884 * @param pvFault The fault address.
885 */
886DECLINLINE(int) pgmPoolAccessPfHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
887 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
888{
889 unsigned uIncrement = pDis->Param1.cb;
890 NOREF(pVM);
891
892 Assert(pDis->uCpuMode == DISCPUMODE_32BIT || pDis->uCpuMode == DISCPUMODE_64BIT);
893 Assert(pRegFrame->rcx <= 0x20);
894
895#ifdef VBOX_STRICT
896 if (pDis->uOpMode == DISCPUMODE_32BIT)
897 Assert(uIncrement == 4);
898 else
899 Assert(uIncrement == 8);
900#endif
901
902 Log3(("pgmPoolAccessPfHandlerSTOSD\n"));
903
904 /*
905 * Increment the modification counter and insert it into the list
906 * of modified pages the first time.
907 */
908 if (!pPage->cModifications++)
909 pgmPoolMonitorModifiedInsert(pPool, pPage);
910
911 /*
912 * Execute REP STOSD.
913 *
914 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
915 * write situation, meaning that it's safe to write here.
916 */
917 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
918 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
919 while (pRegFrame->rcx)
920 {
921#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
922 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
923 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
924 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
925#else
926 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
927#endif
928#ifdef IN_RC
929 *(uint32_t *)(uintptr_t)pu32 = pRegFrame->eax;
930#else
931 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
932#endif
933 pu32 += uIncrement;
934 GCPhysFault += uIncrement;
935 pRegFrame->rdi += uIncrement;
936 pRegFrame->rcx--;
937 }
938 pRegFrame->rip += pDis->cbInstr;
939
940 LogFlow(("pgmPoolAccessPfHandlerSTOSD: returns\n"));
941 return VINF_SUCCESS;
942}
943
944
945/**
946 * Handles the simple write accesses.
947 *
948 * @returns VBox status code suitable for scheduling.
949 * @param pVM Pointer to the VM.
950 * @param pVCpu Pointer to the VMCPU.
951 * @param pPool The pool.
952 * @param pPage The pool page (head).
953 * @param pDis The disassembly of the write instruction.
954 * @param pRegFrame The trap register frame.
955 * @param GCPhysFault The fault address as guest physical address.
956 * @param pvFault The fault address.
957 * @param pfReused Reused state (in/out)
958 */
959DECLINLINE(int) pgmPoolAccessPfHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
960 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
961{
962 Log3(("pgmPoolAccessPfHandlerSimple\n"));
963 NOREF(pVM);
964 NOREF(pfReused); /* initialized by caller */
965
966 /*
967 * Increment the modification counter and insert it into the list
968 * of modified pages the first time.
969 */
970 if (!pPage->cModifications++)
971 pgmPoolMonitorModifiedInsert(pPool, pPage);
972
973 /*
974 * Clear all the pages. ASSUMES that pvFault is readable.
975 */
976#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
977 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
978#endif
979
980 uint32_t cbWrite = DISGetParamSize(pDis, &pDis->Param1);
981 if (cbWrite <= 8)
982 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, cbWrite);
983 else
984 {
985 Assert(cbWrite <= 16);
986 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, 8);
987 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + 8, NULL, cbWrite - 8);
988 }
989
990#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
991 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
992#endif
993
994 /*
995 * Interpret the instruction.
996 */
997 VBOXSTRICTRC rc = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
998 if (RT_SUCCESS(rc))
999 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
1000 else if (rc == VERR_EM_INTERPRETER)
1001 {
1002 LogFlow(("pgmPoolAccessPfHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1003 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode));
1004 rc = VINF_EM_RAW_EMULATE_INSTR;
1005 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1006 }
1007
1008#if 0 /* experimental code */
1009 if (rc == VINF_SUCCESS)
1010 {
1011 switch (pPage->enmKind)
1012 {
1013 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1014 {
1015 X86PTEPAE GstPte;
1016 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1017 AssertRC(rc);
1018
1019 /* Check the new value written by the guest. If present and with a bogus physical address, then
1020 * it's fairly safe to assume the guest is reusing the PT.
1021 */
1022 if (GstPte.n.u1Present)
1023 {
1024 RTHCPHYS HCPhys = -1;
1025 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1026 if (rc != VINF_SUCCESS)
1027 {
1028 *pfReused = true;
1029 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1030 }
1031 }
1032 break;
1033 }
1034 }
1035 }
1036#endif
1037
1038 LogFlow(("pgmPoolAccessPfHandlerSimple: returns %Rrc\n", VBOXSTRICTRC_VAL(rc)));
1039 return VBOXSTRICTRC_VAL(rc);
1040}
1041
1042
1043/**
1044 * @callback_method_impl{FNPGMRZPHYSPFHANDLER,
1045 * \#PF access handler callback for page table pages.}
1046 *
1047 * @remarks The @a pvUser argument points to the PGMPOOLPAGE.
1048 */
1049DECLEXPORT(VBOXSTRICTRC) pgmPoolAccessPfHandler(PVM pVM, PVMCPU pVCpu, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame,
1050 RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1051{
1052 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1053 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1054 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1055 unsigned cMaxModifications;
1056 bool fForcedFlush = false;
1057 NOREF(uErrorCode);
1058
1059 LogFlow(("pgmPoolAccessPfHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1060
1061 pgmLock(pVM);
1062 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1063 {
1064 /* Pool page changed while we were waiting for the lock; ignore. */
1065 Log(("CPU%d: pgmPoolAccessPfHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1066 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1067 pgmUnlock(pVM);
1068 return VINF_SUCCESS;
1069 }
1070#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1071 if (pPage->fDirty)
1072 {
1073 Assert(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH));
1074 pgmUnlock(pVM);
1075 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1076 }
1077#endif
1078
1079#if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1080 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1081 {
1082 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1083 void *pvGst;
1084 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1085 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1086 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1087 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1088 }
1089#endif
1090
1091 /*
1092 * Disassemble the faulting instruction.
1093 */
1094 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1095 int rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
1096 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1097 {
1098 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1099 pgmUnlock(pVM);
1100 return rc;
1101 }
1102
1103 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1104
1105 /*
1106 * We should ALWAYS have the list head as user parameter. This
1107 * is because we use that page to record the changes.
1108 */
1109 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1110
1111#ifdef IN_RING0
1112 /* Maximum nr of modifications depends on the page type. */
1113 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1114 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1115 cMaxModifications = 4;
1116 else
1117 cMaxModifications = 24;
1118#else
1119 cMaxModifications = 48;
1120#endif
1121
1122 /*
1123 * Incremental page table updates should weigh more than random ones.
1124 * (Only applies when started from offset 0)
1125 */
1126 pVCpu->pgm.s.cPoolAccessHandler++;
1127 if ( pPage->GCPtrLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1128 && pPage->GCPtrLastAccessHandlerRip < pRegFrame->rip + 0x40
1129 && pvFault == (pPage->GCPtrLastAccessHandlerFault + pDis->Param1.cb)
1130 && pVCpu->pgm.s.cPoolAccessHandler == pPage->cLastAccessHandler + 1)
1131 {
1132 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1133 Assert(pPage->cModifications < 32000);
1134 pPage->cModifications = pPage->cModifications * 2;
1135 pPage->GCPtrLastAccessHandlerFault = pvFault;
1136 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1137 if (pPage->cModifications >= cMaxModifications)
1138 {
1139 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1140 fForcedFlush = true;
1141 }
1142 }
1143
1144 if (pPage->cModifications >= cMaxModifications)
1145 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1146
1147 /*
1148 * Check if it's worth dealing with.
1149 */
1150 bool fReused = false;
1151 bool fNotReusedNotForking = false;
1152 if ( ( pPage->cModifications < cMaxModifications /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1153 || pgmPoolIsPageLocked(pPage)
1154 )
1155 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1156 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1157 {
1158 /*
1159 * Simple instructions, no REP prefix.
1160 */
1161 if (!(pDis->fPrefix & (DISPREFIX_REP | DISPREFIX_REPNE)))
1162 {
1163 rc = pgmPoolAccessPfHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1164 if (fReused)
1165 goto flushPage;
1166
1167 /* A mov instruction to change the first page table entry will be remembered so we can detect
1168 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1169 */
1170 if ( rc == VINF_SUCCESS
1171 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1172 && pDis->pCurInstr->uOpcode == OP_MOV
1173 && (pvFault & PAGE_OFFSET_MASK) == 0)
1174 {
1175 pPage->GCPtrLastAccessHandlerFault = pvFault;
1176 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1177 pPage->GCPtrLastAccessHandlerRip = pRegFrame->rip;
1178 /* Make sure we don't kick out a page too quickly. */
1179 if (pPage->cModifications > 8)
1180 pPage->cModifications = 2;
1181 }
1182 else if (pPage->GCPtrLastAccessHandlerFault == pvFault)
1183 {
1184 /* ignore the 2nd write to this page table entry. */
1185 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1186 }
1187 else
1188 {
1189 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
1190 pPage->GCPtrLastAccessHandlerRip = 0;
1191 }
1192
1193 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1194 pgmUnlock(pVM);
1195 return rc;
1196 }
1197
1198 /*
1199 * Windows is frequently doing small memset() operations (netio test 4k+).
1200 * We have to deal with these or we'll kill the cache and performance.
1201 */
1202 if ( pDis->pCurInstr->uOpcode == OP_STOSWD
1203 && !pRegFrame->eflags.Bits.u1DF
1204 && pDis->uOpMode == pDis->uCpuMode
1205 && pDis->uAddrMode == pDis->uCpuMode)
1206 {
1207 bool fValidStosd = false;
1208
1209 if ( pDis->uCpuMode == DISCPUMODE_32BIT
1210 && pDis->fPrefix == DISPREFIX_REP
1211 && pRegFrame->ecx <= 0x20
1212 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1213 && !((uintptr_t)pvFault & 3)
1214 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1215 )
1216 {
1217 fValidStosd = true;
1218 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1219 }
1220 else
1221 if ( pDis->uCpuMode == DISCPUMODE_64BIT
1222 && pDis->fPrefix == (DISPREFIX_REP | DISPREFIX_REX)
1223 && pRegFrame->rcx <= 0x20
1224 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1225 && !((uintptr_t)pvFault & 7)
1226 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1227 )
1228 {
1229 fValidStosd = true;
1230 }
1231
1232 if (fValidStosd)
1233 {
1234 rc = pgmPoolAccessPfHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1235 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1236 pgmUnlock(pVM);
1237 return rc;
1238 }
1239 }
1240
1241 /* REP prefix, don't bother. */
1242 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1243 Log4(("pgmPoolAccessPfHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1244 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode, pDis->fPrefix));
1245 fNotReusedNotForking = true;
1246 }
1247
1248#if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1249 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1250 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1251 */
1252 if ( pPage->cModifications >= cMaxModifications
1253 && !fForcedFlush
1254 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1255 && ( fNotReusedNotForking
1256 || ( !pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
1257 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1258 )
1259 )
1260 {
1261 Assert(!pgmPoolIsPageLocked(pPage));
1262 Assert(pPage->fDirty == false);
1263
1264 /* Flush any monitored duplicates as we will disable write protection. */
1265 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1266 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1267 {
1268 PPGMPOOLPAGE pPageHead = pPage;
1269
1270 /* Find the monitor head. */
1271 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1272 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1273
1274 while (pPageHead)
1275 {
1276 unsigned idxNext = pPageHead->iMonitoredNext;
1277
1278 if (pPageHead != pPage)
1279 {
1280 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1281 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1282 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1283 AssertRC(rc2);
1284 }
1285
1286 if (idxNext == NIL_PGMPOOL_IDX)
1287 break;
1288
1289 pPageHead = &pPool->aPages[idxNext];
1290 }
1291 }
1292
1293 /* The flushing above might fail for locked pages, so double check. */
1294 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1295 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1296 {
1297 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1298
1299 /* Temporarily allow write access to the page table again. */
1300 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK, pPage->GCPhys & PAGE_BASE_GC_MASK);
1301 if (rc == VINF_SUCCESS)
1302 {
1303 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1304 AssertMsg(rc == VINF_SUCCESS
1305 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1306 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1307 || rc == VERR_PAGE_NOT_PRESENT,
1308 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1309# ifdef VBOX_STRICT
1310 pPage->GCPtrDirtyFault = pvFault;
1311# endif
1312
1313 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1314 pgmUnlock(pVM);
1315 return rc;
1316 }
1317 }
1318 }
1319#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1320
1321 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushModOverflow));
1322flushPage:
1323 /*
1324 * Not worth it, so flush it.
1325 *
1326 * If we considered it to be reused, don't go back to ring-3
1327 * to emulate failed instructions since we usually cannot
1328 * interpret then. This may be a bit risky, in which case
1329 * the reuse detection must be fixed.
1330 */
1331 rc = pgmPoolAccessPfHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1332 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1333 && fReused)
1334 {
1335 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1336 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1337 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1338 }
1339 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1340 pgmUnlock(pVM);
1341 return rc;
1342}
1343
1344# endif /* !IN_RING3 */
1345
1346/**
1347 * @callback_method_impl{FNPGMPHYSHANDLER,
1348 * Access handler for shadowed page table pages.}
1349 *
1350 * @remarks Only uses the VINF_PGM_HANDLER_DO_DEFAULT status.
1351 */
1352PGM_ALL_CB2_DECL(VBOXSTRICTRC)
1353pgmPoolAccessHandler(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf,
1354 PGMACCESSTYPE enmAccessType, PGMACCESSORIGIN enmOrigin, void *pvUser)
1355{
1356 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1357 STAM_PROFILE_START(&pPool->StatMonitorR3, a);
1358 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1359 LogFlow(("PGM_ALL_CB_DECL: GCPhys=%RGp %p:{.Core=%RHp, .idx=%d, .GCPhys=%RGp, .enmType=%d}\n",
1360 GCPhys, pPage, pPage->Core.Key, pPage->idx, pPage->GCPhys, pPage->enmKind));
1361
1362 NOREF(pvBuf); NOREF(enmAccessType);
1363
1364 /*
1365 * Make sure the pool page wasn't modified by a different CPU.
1366 */
1367 pgmLock(pVM);
1368 if (PHYS_PAGE_ADDRESS(GCPhys) == PHYS_PAGE_ADDRESS(pPage->GCPhys))
1369 {
1370 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1371
1372 /* The max modification count before flushing depends on the context and page type. */
1373#ifdef IN_RING3
1374 uint16_t const cMaxModifications = 96; /* it's cheaper here, right? */
1375#else
1376 uint16_t cMaxModifications;
1377 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1378 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1379 cMaxModifications = 4;
1380 else
1381 cMaxModifications = 24;
1382# ifdef IN_RC
1383 cMaxModifications *= 2; /* traps are cheaper than exists. */
1384# endif
1385#endif
1386
1387 /*
1388 * We don't have to be very sophisticated about this since there are relativly few calls here.
1389 * However, we must try our best to detect any non-cpu accesses (disk / networking).
1390 */
1391 if ( ( pPage->cModifications < cMaxModifications
1392 || pgmPoolIsPageLocked(pPage) )
1393 && enmOrigin != PGMACCESSORIGIN_DEVICE
1394 && cbBuf <= 16)
1395 {
1396 /* Clear the shadow entry. */
1397 if (!pPage->cModifications++)
1398 pgmPoolMonitorModifiedInsert(pPool, pPage);
1399
1400 if (cbBuf <= 8)
1401 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, (uint32_t)cbBuf);
1402 else
1403 {
1404 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, 8);
1405 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys + 8, (uint8_t *)pvBuf + 8, (uint32_t)cbBuf - 8);
1406 }
1407 }
1408 else
1409 {
1410 /* ASSUME that VERR_PGM_POOL_CLEARED can be ignored here and that FFs will deal with it in due time. */
1411 pgmPoolMonitorChainFlush(pPool, pPage);
1412 }
1413
1414 STAM_PROFILE_STOP_EX(&pPool->StatMonitorR3, &pPool->StatMonitorR3FlushPage, a);
1415 }
1416 else
1417 Log(("CPU%d: PGM_ALL_CB_DECL pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhys), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1418 pgmUnlock(pVM);
1419 return VINF_PGM_HANDLER_DO_DEFAULT;
1420}
1421
1422
1423# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1424
1425# if defined(VBOX_STRICT) && !defined(IN_RING3)
1426
1427/**
1428 * Check references to guest physical memory in a PAE / PAE page table.
1429 *
1430 * @param pPool The pool.
1431 * @param pPage The page.
1432 * @param pShwPT The shadow page table (mapping of the page).
1433 * @param pGstPT The guest page table.
1434 */
1435static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1436{
1437 unsigned cErrors = 0;
1438 int LastRc = -1; /* initialized to shut up gcc */
1439 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1440 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1441 PVM pVM = pPool->CTX_SUFF(pVM);
1442
1443#ifdef VBOX_STRICT
1444 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1445 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1446#endif
1447 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1448 {
1449 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1450 {
1451 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1452 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1453 if ( rc != VINF_SUCCESS
1454 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1455 {
1456 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1457 LastPTE = i;
1458 LastRc = rc;
1459 LastHCPhys = HCPhys;
1460 cErrors++;
1461
1462 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1463 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1464 AssertRC(rc);
1465
1466 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1467 {
1468 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1469
1470 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1471 {
1472 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1473
1474 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1475 {
1476 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1477 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1478 {
1479 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1480 }
1481 }
1482
1483 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1484 }
1485 }
1486 }
1487 }
1488 }
1489 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1490}
1491
1492
1493/**
1494 * Check references to guest physical memory in a PAE / 32-bit page table.
1495 *
1496 * @param pPool The pool.
1497 * @param pPage The page.
1498 * @param pShwPT The shadow page table (mapping of the page).
1499 * @param pGstPT The guest page table.
1500 */
1501static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1502{
1503 unsigned cErrors = 0;
1504 int LastRc = -1; /* initialized to shut up gcc */
1505 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1506 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1507 PVM pVM = pPool->CTX_SUFF(pVM);
1508
1509#ifdef VBOX_STRICT
1510 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1511 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1512#endif
1513 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1514 {
1515 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1516 {
1517 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1518 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1519 if ( rc != VINF_SUCCESS
1520 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1521 {
1522 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1523 LastPTE = i;
1524 LastRc = rc;
1525 LastHCPhys = HCPhys;
1526 cErrors++;
1527
1528 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1529 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1530 AssertRC(rc);
1531
1532 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1533 {
1534 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1535
1536 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1537 {
1538 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1539
1540 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1541 {
1542 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1543 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1544 {
1545 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1546 }
1547 }
1548
1549 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1550 }
1551 }
1552 }
1553 }
1554 }
1555 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1556}
1557
1558# endif /* VBOX_STRICT && !IN_RING3 */
1559
1560/**
1561 * Clear references to guest physical memory in a PAE / PAE page table.
1562 *
1563 * @returns nr of changed PTEs
1564 * @param pPool The pool.
1565 * @param pPage The page.
1566 * @param pShwPT The shadow page table (mapping of the page).
1567 * @param pGstPT The guest page table.
1568 * @param pOldGstPT The old cached guest page table.
1569 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1570 * @param pfFlush Flush reused page table (out)
1571 */
1572DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1573 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1574{
1575 unsigned cChanged = 0;
1576
1577#ifdef VBOX_STRICT
1578 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1579 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1580#endif
1581 *pfFlush = false;
1582
1583 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1584 {
1585 /* Check the new value written by the guest. If present and with a bogus physical address, then
1586 * it's fairly safe to assume the guest is reusing the PT.
1587 */
1588 if ( fAllowRemoval
1589 && pGstPT->a[i].n.u1Present)
1590 {
1591 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1592 {
1593 *pfFlush = true;
1594 return ++cChanged;
1595 }
1596 }
1597 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1598 {
1599 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1600 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1601 {
1602#ifdef VBOX_STRICT
1603 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1604 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1605 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1606#endif
1607 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1608 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1609 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1610 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1611
1612 if ( uHostAttr == uGuestAttr
1613 && fHostRW <= fGuestRW)
1614 continue;
1615 }
1616 cChanged++;
1617 /* Something was changed, so flush it. */
1618 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1619 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1620 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1621 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1622 }
1623 }
1624 return cChanged;
1625}
1626
1627
1628/**
1629 * Clear references to guest physical memory in a PAE / PAE page table.
1630 *
1631 * @returns nr of changed PTEs
1632 * @param pPool The pool.
1633 * @param pPage The page.
1634 * @param pShwPT The shadow page table (mapping of the page).
1635 * @param pGstPT The guest page table.
1636 * @param pOldGstPT The old cached guest page table.
1637 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1638 * @param pfFlush Flush reused page table (out)
1639 */
1640DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1641 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1642{
1643 unsigned cChanged = 0;
1644
1645#ifdef VBOX_STRICT
1646 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1647 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1648#endif
1649 *pfFlush = false;
1650
1651 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1652 {
1653 /* Check the new value written by the guest. If present and with a bogus physical address, then
1654 * it's fairly safe to assume the guest is reusing the PT.
1655 */
1656 if ( fAllowRemoval
1657 && pGstPT->a[i].n.u1Present)
1658 {
1659 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK))
1660 {
1661 *pfFlush = true;
1662 return ++cChanged;
1663 }
1664 }
1665 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1666 {
1667 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1668 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1669 {
1670#ifdef VBOX_STRICT
1671 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1672 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1673 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1674#endif
1675 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1676 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1677 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1678 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1679
1680 if ( uHostAttr == uGuestAttr
1681 && fHostRW <= fGuestRW)
1682 continue;
1683 }
1684 cChanged++;
1685 /* Something was changed, so flush it. */
1686 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1687 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1688 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1689 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1690 }
1691 }
1692 return cChanged;
1693}
1694
1695
1696/**
1697 * Flush a dirty page
1698 *
1699 * @param pVM Pointer to the VM.
1700 * @param pPool The pool.
1701 * @param idxSlot Dirty array slot index
1702 * @param fAllowRemoval Allow a reused page table to be removed
1703 */
1704static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1705{
1706 PPGMPOOLPAGE pPage;
1707 unsigned idxPage;
1708
1709 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1710 if (pPool->aDirtyPages[idxSlot].uIdx == NIL_PGMPOOL_IDX)
1711 return;
1712
1713 idxPage = pPool->aDirtyPages[idxSlot].uIdx;
1714 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1715 pPage = &pPool->aPages[idxPage];
1716 Assert(pPage->idx == idxPage);
1717 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1718
1719 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1720 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1721
1722#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1723 PVMCPU pVCpu = VMMGetCpu(pVM);
1724 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
1725#endif
1726
1727 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1728 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK);
1729 Assert(rc == VINF_SUCCESS);
1730 pPage->fDirty = false;
1731
1732#ifdef VBOX_STRICT
1733 uint64_t fFlags = 0;
1734 RTHCPHYS HCPhys;
1735 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->GCPtrDirtyFault, &fFlags, &HCPhys);
1736 AssertMsg( ( rc == VINF_SUCCESS
1737 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1738 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1739 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1740 || rc == VERR_PAGE_NOT_PRESENT,
1741 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->GCPtrDirtyFault, rc, fFlags));
1742#endif
1743
1744 /* Flush those PTEs that have changed. */
1745 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1746 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1747 void *pvGst;
1748 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1749 bool fFlush;
1750 unsigned cChanges;
1751
1752 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1753 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1754 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1755 else
1756 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1757 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1758
1759 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1760 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1761 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1762 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1763
1764 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1765 Assert(pPage->cModifications);
1766 if (cChanges < 4)
1767 pPage->cModifications = 1; /* must use > 0 here */
1768 else
1769 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1770
1771 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1772 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1773 pPool->idxFreeDirtyPage = idxSlot;
1774
1775 pPool->cDirtyPages--;
1776 pPool->aDirtyPages[idxSlot].uIdx = NIL_PGMPOOL_IDX;
1777 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1778 if (fFlush)
1779 {
1780 Assert(fAllowRemoval);
1781 Log(("Flush reused page table!\n"));
1782 pgmPoolFlushPage(pPool, pPage);
1783 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1784 }
1785 else
1786 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1787
1788#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1789 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
1790#endif
1791}
1792
1793
1794# ifndef IN_RING3
1795/**
1796 * Add a new dirty page
1797 *
1798 * @param pVM Pointer to the VM.
1799 * @param pPool The pool.
1800 * @param pPage The page.
1801 */
1802void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1803{
1804 unsigned idxFree;
1805
1806 PGM_LOCK_ASSERT_OWNER(pVM);
1807 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1808 Assert(!pPage->fDirty);
1809
1810 idxFree = pPool->idxFreeDirtyPage;
1811 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1812 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1813
1814 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1815 {
1816 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1817 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1818 }
1819 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1820 AssertMsg(pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1821
1822 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1823
1824 /*
1825 * Make a copy of the guest page table as we require valid GCPhys addresses
1826 * when removing references to physical pages.
1827 * (The HCPhys linear lookup is *extremely* expensive!)
1828 */
1829 void *pvGst;
1830 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1831 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst, (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT) ? PAGE_SIZE : PAGE_SIZE/2);
1832# ifdef VBOX_STRICT
1833 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1834 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1835 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1836 else
1837 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1838 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1839# endif
1840 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1841
1842 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1843 pPage->fDirty = true;
1844 pPage->idxDirtyEntry = (uint8_t)idxFree; Assert(pPage->idxDirtyEntry == idxFree);
1845 pPool->aDirtyPages[idxFree].uIdx = pPage->idx;
1846 pPool->cDirtyPages++;
1847
1848 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1849 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1850 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1851 {
1852 unsigned i;
1853 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1854 {
1855 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1856 if (pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX)
1857 {
1858 pPool->idxFreeDirtyPage = idxFree;
1859 break;
1860 }
1861 }
1862 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1863 }
1864
1865 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX);
1866
1867 /*
1868 * Clear all references to this shadow table. See @bugref{7298}.
1869 */
1870 pgmPoolTrackClearPageUsers(pPool, pPage);
1871}
1872# endif /* !IN_RING3 */
1873
1874
1875/**
1876 * Check if the specified page is dirty (not write monitored)
1877 *
1878 * @return dirty or not
1879 * @param pVM Pointer to the VM.
1880 * @param GCPhys Guest physical address
1881 */
1882bool pgmPoolIsDirtyPage(PVM pVM, RTGCPHYS GCPhys)
1883{
1884 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1885 PGM_LOCK_ASSERT_OWNER(pVM);
1886 if (!pPool->cDirtyPages)
1887 return false;
1888
1889 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1890
1891 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1892 {
1893 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
1894 {
1895 PPGMPOOLPAGE pPage;
1896 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
1897
1898 pPage = &pPool->aPages[idxPage];
1899 if (pPage->GCPhys == GCPhys)
1900 return true;
1901 }
1902 }
1903 return false;
1904}
1905
1906
1907/**
1908 * Reset all dirty pages by reinstating page monitoring.
1909 *
1910 * @param pVM Pointer to the VM.
1911 */
1912void pgmPoolResetDirtyPages(PVM pVM)
1913{
1914 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1915 PGM_LOCK_ASSERT_OWNER(pVM);
1916 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1917
1918 if (!pPool->cDirtyPages)
1919 return;
1920
1921 Log(("pgmPoolResetDirtyPages\n"));
1922 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1923 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1924
1925 pPool->idxFreeDirtyPage = 0;
1926 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1927 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1928 {
1929 unsigned i;
1930 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1931 {
1932 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
1933 {
1934 pPool->idxFreeDirtyPage = i;
1935 break;
1936 }
1937 }
1938 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1939 }
1940
1941 Assert(pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
1942 return;
1943}
1944
1945
1946/**
1947 * Invalidate the PT entry for the specified page
1948 *
1949 * @param pVM Pointer to the VM.
1950 * @param GCPtrPage Guest page to invalidate
1951 */
1952void pgmPoolResetDirtyPage(PVM pVM, RTGCPTR GCPtrPage)
1953{
1954 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1955 PGM_LOCK_ASSERT_OWNER(pVM);
1956 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1957
1958 if (!pPool->cDirtyPages)
1959 return;
1960
1961 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage));
1962 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1963 {
1964 }
1965}
1966
1967
1968/**
1969 * Reset all dirty pages by reinstating page monitoring.
1970 *
1971 * @param pVM Pointer to the VM.
1972 * @param GCPhysPT Physical address of the page table
1973 */
1974void pgmPoolInvalidateDirtyPage(PVM pVM, RTGCPHYS GCPhysPT)
1975{
1976 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1977 PGM_LOCK_ASSERT_OWNER(pVM);
1978 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1979 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
1980
1981 if (!pPool->cDirtyPages)
1982 return;
1983
1984 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1985
1986 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1987 {
1988 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
1989 {
1990 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
1991
1992 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1993 if (pPage->GCPhys == GCPhysPT)
1994 {
1995 idxDirtyPage = i;
1996 break;
1997 }
1998 }
1999 }
2000
2001 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
2002 {
2003 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
2004 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
2005 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
2006 {
2007 unsigned i;
2008 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2009 {
2010 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
2011 {
2012 pPool->idxFreeDirtyPage = i;
2013 break;
2014 }
2015 }
2016 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
2017 }
2018 }
2019}
2020
2021# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
2022
2023/**
2024 * Inserts a page into the GCPhys hash table.
2025 *
2026 * @param pPool The pool.
2027 * @param pPage The page.
2028 */
2029DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2030{
2031 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
2032 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2033 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2034 pPage->iNext = pPool->aiHash[iHash];
2035 pPool->aiHash[iHash] = pPage->idx;
2036}
2037
2038
2039/**
2040 * Removes a page from the GCPhys hash table.
2041 *
2042 * @param pPool The pool.
2043 * @param pPage The page.
2044 */
2045DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2046{
2047 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
2048 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2049 if (pPool->aiHash[iHash] == pPage->idx)
2050 pPool->aiHash[iHash] = pPage->iNext;
2051 else
2052 {
2053 uint16_t iPrev = pPool->aiHash[iHash];
2054 for (;;)
2055 {
2056 const int16_t i = pPool->aPages[iPrev].iNext;
2057 if (i == pPage->idx)
2058 {
2059 pPool->aPages[iPrev].iNext = pPage->iNext;
2060 break;
2061 }
2062 if (i == NIL_PGMPOOL_IDX)
2063 {
2064 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
2065 break;
2066 }
2067 iPrev = i;
2068 }
2069 }
2070 pPage->iNext = NIL_PGMPOOL_IDX;
2071}
2072
2073
2074/**
2075 * Frees up one cache page.
2076 *
2077 * @returns VBox status code.
2078 * @retval VINF_SUCCESS on success.
2079 * @param pPool The pool.
2080 * @param iUser The user index.
2081 */
2082static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
2083{
2084#ifndef IN_RC
2085 const PVM pVM = pPool->CTX_SUFF(pVM);
2086#endif
2087 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
2088 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
2089
2090 /*
2091 * Select one page from the tail of the age list.
2092 */
2093 PPGMPOOLPAGE pPage;
2094 for (unsigned iLoop = 0; ; iLoop++)
2095 {
2096 uint16_t iToFree = pPool->iAgeTail;
2097 if (iToFree == iUser && iUser != NIL_PGMPOOL_IDX)
2098 iToFree = pPool->aPages[iToFree].iAgePrev;
2099/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2100 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2101 {
2102 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2103 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2104 {
2105 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2106 continue;
2107 iToFree = i;
2108 break;
2109 }
2110 }
2111*/
2112 Assert(iToFree != iUser);
2113 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
2114 pPage = &pPool->aPages[iToFree];
2115
2116 /*
2117 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2118 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2119 */
2120 if ( !pgmPoolIsPageLocked(pPage)
2121 && pPage->idx >= PGMPOOL_IDX_FIRST /* paranoia (#6349) */)
2122 break;
2123 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2124 pgmPoolCacheUsed(pPool, pPage);
2125 AssertLogRelReturn(iLoop < 8192, VERR_PGM_POOL_TOO_MANY_LOOPS);
2126 }
2127
2128 /*
2129 * Found a usable page, flush it and return.
2130 */
2131 int rc = pgmPoolFlushPage(pPool, pPage);
2132 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2133 /* todo: find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2134 if (rc == VINF_SUCCESS)
2135 PGM_INVL_ALL_VCPU_TLBS(pVM);
2136 return rc;
2137}
2138
2139
2140/**
2141 * Checks if a kind mismatch is really a page being reused
2142 * or if it's just normal remappings.
2143 *
2144 * @returns true if reused and the cached page (enmKind1) should be flushed
2145 * @returns false if not reused.
2146 * @param enmKind1 The kind of the cached page.
2147 * @param enmKind2 The kind of the requested page.
2148 */
2149static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2150{
2151 switch (enmKind1)
2152 {
2153 /*
2154 * Never reuse them. There is no remapping in non-paging mode.
2155 */
2156 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2157 case PGMPOOLKIND_32BIT_PD_PHYS:
2158 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2159 case PGMPOOLKIND_PAE_PD_PHYS:
2160 case PGMPOOLKIND_PAE_PDPT_PHYS:
2161 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2162 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2163 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2164 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2165 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2166 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2167 return false;
2168
2169 /*
2170 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2171 */
2172 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2173 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2174 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2175 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2176 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2177 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2178 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2179 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2180 case PGMPOOLKIND_32BIT_PD:
2181 case PGMPOOLKIND_PAE_PDPT:
2182 switch (enmKind2)
2183 {
2184 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2185 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2186 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2187 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2188 case PGMPOOLKIND_64BIT_PML4:
2189 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2190 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2191 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2192 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2193 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2194 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2195 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2196 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2197 return true;
2198 default:
2199 return false;
2200 }
2201
2202 /*
2203 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2204 */
2205 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2206 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2207 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2208 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2209 case PGMPOOLKIND_64BIT_PML4:
2210 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2211 switch (enmKind2)
2212 {
2213 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2214 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2215 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2216 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2217 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2218 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2219 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2220 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2221 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2222 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2223 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2224 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2225 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2226 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2227 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2228 return true;
2229 default:
2230 return false;
2231 }
2232
2233 /*
2234 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2235 */
2236 case PGMPOOLKIND_ROOT_NESTED:
2237 return false;
2238
2239 default:
2240 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2241 }
2242}
2243
2244
2245/**
2246 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2247 *
2248 * @returns VBox status code.
2249 * @retval VINF_PGM_CACHED_PAGE on success.
2250 * @retval VERR_FILE_NOT_FOUND if not found.
2251 * @param pPool The pool.
2252 * @param GCPhys The GC physical address of the page we're gonna shadow.
2253 * @param enmKind The kind of mapping.
2254 * @param enmAccess Access type for the mapping (only relevant for big pages)
2255 * @param fA20Enabled Whether the CPU has the A20 gate enabled.
2256 * @param iUser The shadow page pool index of the user table. This is
2257 * NIL_PGMPOOL_IDX for root pages.
2258 * @param iUserTable The index into the user table (shadowed). Ignored if
2259 * root page
2260 * @param ppPage Where to store the pointer to the page.
2261 */
2262static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
2263 uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2264{
2265 /*
2266 * Look up the GCPhys in the hash.
2267 */
2268 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2269 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2270 if (i != NIL_PGMPOOL_IDX)
2271 {
2272 do
2273 {
2274 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2275 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2276 if (pPage->GCPhys == GCPhys)
2277 {
2278 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2279 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess
2280 && pPage->fA20Enabled == fA20Enabled)
2281 {
2282 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2283 * doesn't flush it in case there are no more free use records.
2284 */
2285 pgmPoolCacheUsed(pPool, pPage);
2286
2287 int rc = VINF_SUCCESS;
2288 if (iUser != NIL_PGMPOOL_IDX)
2289 rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2290 if (RT_SUCCESS(rc))
2291 {
2292 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2293 *ppPage = pPage;
2294 if (pPage->cModifications)
2295 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2296 STAM_COUNTER_INC(&pPool->StatCacheHits);
2297 return VINF_PGM_CACHED_PAGE;
2298 }
2299 return rc;
2300 }
2301
2302 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2303 {
2304 /*
2305 * The kind is different. In some cases we should now flush the page
2306 * as it has been reused, but in most cases this is normal remapping
2307 * of PDs as PT or big pages using the GCPhys field in a slightly
2308 * different way than the other kinds.
2309 */
2310 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2311 {
2312 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2313 pgmPoolFlushPage(pPool, pPage);
2314 break;
2315 }
2316 }
2317 }
2318
2319 /* next */
2320 i = pPage->iNext;
2321 } while (i != NIL_PGMPOOL_IDX);
2322 }
2323
2324 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2325 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2326 return VERR_FILE_NOT_FOUND;
2327}
2328
2329
2330/**
2331 * Inserts a page into the cache.
2332 *
2333 * @param pPool The pool.
2334 * @param pPage The cached page.
2335 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2336 */
2337static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2338{
2339 /*
2340 * Insert into the GCPhys hash if the page is fit for that.
2341 */
2342 Assert(!pPage->fCached);
2343 if (fCanBeCached)
2344 {
2345 pPage->fCached = true;
2346 pgmPoolHashInsert(pPool, pPage);
2347 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2348 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2349 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2350 }
2351 else
2352 {
2353 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2354 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2355 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2356 }
2357
2358 /*
2359 * Insert at the head of the age list.
2360 */
2361 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2362 pPage->iAgeNext = pPool->iAgeHead;
2363 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2364 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2365 else
2366 pPool->iAgeTail = pPage->idx;
2367 pPool->iAgeHead = pPage->idx;
2368}
2369
2370
2371/**
2372 * Flushes a cached page.
2373 *
2374 * @param pPool The pool.
2375 * @param pPage The cached page.
2376 */
2377static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2378{
2379 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2380
2381 /*
2382 * Remove the page from the hash.
2383 */
2384 if (pPage->fCached)
2385 {
2386 pPage->fCached = false;
2387 pgmPoolHashRemove(pPool, pPage);
2388 }
2389 else
2390 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2391
2392 /*
2393 * Remove it from the age list.
2394 */
2395 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2396 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2397 else
2398 pPool->iAgeTail = pPage->iAgePrev;
2399 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2400 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2401 else
2402 pPool->iAgeHead = pPage->iAgeNext;
2403 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2404 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2405}
2406
2407
2408/**
2409 * Looks for pages sharing the monitor.
2410 *
2411 * @returns Pointer to the head page.
2412 * @returns NULL if not found.
2413 * @param pPool The Pool
2414 * @param pNewPage The page which is going to be monitored.
2415 */
2416static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2417{
2418 /*
2419 * Look up the GCPhys in the hash.
2420 */
2421 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2422 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2423 if (i == NIL_PGMPOOL_IDX)
2424 return NULL;
2425 do
2426 {
2427 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2428 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2429 && pPage != pNewPage)
2430 {
2431 switch (pPage->enmKind)
2432 {
2433 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2434 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2435 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2436 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2437 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2438 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2439 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2440 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2441 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2442 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2443 case PGMPOOLKIND_64BIT_PML4:
2444 case PGMPOOLKIND_32BIT_PD:
2445 case PGMPOOLKIND_PAE_PDPT:
2446 {
2447 /* find the head */
2448 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2449 {
2450 Assert(pPage->iMonitoredPrev != pPage->idx);
2451 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2452 }
2453 return pPage;
2454 }
2455
2456 /* ignore, no monitoring. */
2457 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2458 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2459 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2460 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2461 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2462 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2463 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2464 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2465 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2466 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2467 case PGMPOOLKIND_ROOT_NESTED:
2468 case PGMPOOLKIND_PAE_PD_PHYS:
2469 case PGMPOOLKIND_PAE_PDPT_PHYS:
2470 case PGMPOOLKIND_32BIT_PD_PHYS:
2471 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2472 break;
2473 default:
2474 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2475 }
2476 }
2477
2478 /* next */
2479 i = pPage->iNext;
2480 } while (i != NIL_PGMPOOL_IDX);
2481 return NULL;
2482}
2483
2484
2485/**
2486 * Enabled write monitoring of a guest page.
2487 *
2488 * @returns VBox status code.
2489 * @retval VINF_SUCCESS on success.
2490 * @param pPool The pool.
2491 * @param pPage The cached page.
2492 */
2493static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2494{
2495 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2496
2497 /*
2498 * Filter out the relevant kinds.
2499 */
2500 switch (pPage->enmKind)
2501 {
2502 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2503 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2504 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2505 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2506 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2507 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2508 case PGMPOOLKIND_64BIT_PML4:
2509 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2510 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2511 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2512 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2513 case PGMPOOLKIND_32BIT_PD:
2514 case PGMPOOLKIND_PAE_PDPT:
2515 break;
2516
2517 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2518 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2519 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2520 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2521 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2522 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2523 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2524 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2525 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2526 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2527 case PGMPOOLKIND_ROOT_NESTED:
2528 /* Nothing to monitor here. */
2529 return VINF_SUCCESS;
2530
2531 case PGMPOOLKIND_32BIT_PD_PHYS:
2532 case PGMPOOLKIND_PAE_PDPT_PHYS:
2533 case PGMPOOLKIND_PAE_PD_PHYS:
2534 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2535 /* Nothing to monitor here. */
2536 return VINF_SUCCESS;
2537 default:
2538 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2539 }
2540
2541 /*
2542 * Install handler.
2543 */
2544 int rc;
2545 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2546 if (pPageHead)
2547 {
2548 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2549 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2550
2551#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2552 if (pPageHead->fDirty)
2553 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirtyEntry, false /* do not remove */);
2554#endif
2555
2556 pPage->iMonitoredPrev = pPageHead->idx;
2557 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2558 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2559 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2560 pPageHead->iMonitoredNext = pPage->idx;
2561 rc = VINF_SUCCESS;
2562 }
2563 else
2564 {
2565 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2566 PVM pVM = pPool->CTX_SUFF(pVM);
2567 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2568 rc = PGMHandlerPhysicalRegister(pVM, GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK, pPool->hAccessHandlerType,
2569 MMHyperCCToR3(pVM, pPage), MMHyperCCToR0(pVM, pPage), MMHyperCCToRC(pVM, pPage),
2570 NIL_RTR3PTR /*pszDesc*/);
2571 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2572 * the heap size should suffice. */
2573 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2574 PVMCPU pVCpu = VMMGetCpu(pVM);
2575 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2576 }
2577 pPage->fMonitored = true;
2578 return rc;
2579}
2580
2581
2582/**
2583 * Disables write monitoring of a guest page.
2584 *
2585 * @returns VBox status code.
2586 * @retval VINF_SUCCESS on success.
2587 * @param pPool The pool.
2588 * @param pPage The cached page.
2589 */
2590static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2591{
2592 /*
2593 * Filter out the relevant kinds.
2594 */
2595 switch (pPage->enmKind)
2596 {
2597 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2598 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2599 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2600 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2601 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2602 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2603 case PGMPOOLKIND_64BIT_PML4:
2604 case PGMPOOLKIND_32BIT_PD:
2605 case PGMPOOLKIND_PAE_PDPT:
2606 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2607 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2608 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2609 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2610 break;
2611
2612 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2613 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2614 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2615 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2616 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2617 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2618 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2619 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2620 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2621 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2622 case PGMPOOLKIND_ROOT_NESTED:
2623 case PGMPOOLKIND_PAE_PD_PHYS:
2624 case PGMPOOLKIND_PAE_PDPT_PHYS:
2625 case PGMPOOLKIND_32BIT_PD_PHYS:
2626 /* Nothing to monitor here. */
2627 Assert(!pPage->fMonitored);
2628 return VINF_SUCCESS;
2629
2630 default:
2631 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2632 }
2633 Assert(pPage->fMonitored);
2634
2635 /*
2636 * Remove the page from the monitored list or uninstall it if last.
2637 */
2638 const PVM pVM = pPool->CTX_SUFF(pVM);
2639 int rc;
2640 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2641 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2642 {
2643 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2644 {
2645 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2646 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2647 rc = PGMHandlerPhysicalChangeUserArgs(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK, MMHyperCCToR3(pVM, pNewHead),
2648 MMHyperCCToR0(pVM, pNewHead), MMHyperCCToRC(pVM, pNewHead));
2649
2650 AssertFatalRCSuccess(rc);
2651 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2652 }
2653 else
2654 {
2655 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2656 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2657 {
2658 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2659 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2660 }
2661 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2662 rc = VINF_SUCCESS;
2663 }
2664 }
2665 else
2666 {
2667 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2668 AssertFatalRC(rc);
2669 PVMCPU pVCpu = VMMGetCpu(pVM);
2670 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2671 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2672 }
2673 pPage->fMonitored = false;
2674
2675 /*
2676 * Remove it from the list of modified pages (if in it).
2677 */
2678 pgmPoolMonitorModifiedRemove(pPool, pPage);
2679
2680 return rc;
2681}
2682
2683
2684/**
2685 * Inserts the page into the list of modified pages.
2686 *
2687 * @param pPool The pool.
2688 * @param pPage The page.
2689 */
2690void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2691{
2692 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2693 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2694 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2695 && pPool->iModifiedHead != pPage->idx,
2696 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2697 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2698 pPool->iModifiedHead, pPool->cModifiedPages));
2699
2700 pPage->iModifiedNext = pPool->iModifiedHead;
2701 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2702 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2703 pPool->iModifiedHead = pPage->idx;
2704 pPool->cModifiedPages++;
2705#ifdef VBOX_WITH_STATISTICS
2706 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2707 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2708#endif
2709}
2710
2711
2712/**
2713 * Removes the page from the list of modified pages and resets the
2714 * modification counter.
2715 *
2716 * @param pPool The pool.
2717 * @param pPage The page which is believed to be in the list of modified pages.
2718 */
2719static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2720{
2721 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2722 if (pPool->iModifiedHead == pPage->idx)
2723 {
2724 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2725 pPool->iModifiedHead = pPage->iModifiedNext;
2726 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2727 {
2728 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2729 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2730 }
2731 pPool->cModifiedPages--;
2732 }
2733 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2734 {
2735 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2736 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2737 {
2738 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2739 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2740 }
2741 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2742 pPool->cModifiedPages--;
2743 }
2744 else
2745 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2746 pPage->cModifications = 0;
2747}
2748
2749
2750/**
2751 * Zaps the list of modified pages, resetting their modification counters in the process.
2752 *
2753 * @param pVM Pointer to the VM.
2754 */
2755static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2756{
2757 pgmLock(pVM);
2758 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2759 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2760
2761 unsigned cPages = 0; NOREF(cPages);
2762
2763#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2764 pgmPoolResetDirtyPages(pVM);
2765#endif
2766
2767 uint16_t idx = pPool->iModifiedHead;
2768 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2769 while (idx != NIL_PGMPOOL_IDX)
2770 {
2771 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2772 idx = pPage->iModifiedNext;
2773 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2774 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2775 pPage->cModifications = 0;
2776 Assert(++cPages);
2777 }
2778 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2779 pPool->cModifiedPages = 0;
2780 pgmUnlock(pVM);
2781}
2782
2783
2784/**
2785 * Handle SyncCR3 pool tasks
2786 *
2787 * @returns VBox status code.
2788 * @retval VINF_SUCCESS if successfully added.
2789 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2790 * @param pVCpu Pointer to the VMCPU.
2791 * @remark Should only be used when monitoring is available, thus placed in
2792 * the PGMPOOL_WITH_MONITORING #ifdef.
2793 */
2794int pgmPoolSyncCR3(PVMCPU pVCpu)
2795{
2796 PVM pVM = pVCpu->CTX_SUFF(pVM);
2797 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2798
2799 /*
2800 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2801 * Occasionally we will have to clear all the shadow page tables because we wanted
2802 * to monitor a page which was mapped by too many shadowed page tables. This operation
2803 * sometimes referred to as a 'lightweight flush'.
2804 */
2805# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2806 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2807 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2808# else /* !IN_RING3 */
2809 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2810 {
2811 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2812 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2813
2814 /* Make sure all other VCPUs return to ring 3. */
2815 if (pVM->cCpus > 1)
2816 {
2817 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2818 PGM_INVL_ALL_VCPU_TLBS(pVM);
2819 }
2820 return VINF_PGM_SYNC_CR3;
2821 }
2822# endif /* !IN_RING3 */
2823 else
2824 {
2825 pgmPoolMonitorModifiedClearAll(pVM);
2826
2827 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2828 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2829 {
2830 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2831 return pgmPoolSyncCR3(pVCpu);
2832 }
2833 }
2834 return VINF_SUCCESS;
2835}
2836
2837
2838/**
2839 * Frees up at least one user entry.
2840 *
2841 * @returns VBox status code.
2842 * @retval VINF_SUCCESS if successfully added.
2843 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2844 * @param pPool The pool.
2845 * @param iUser The user index.
2846 */
2847static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2848{
2849 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2850 /*
2851 * Just free cached pages in a braindead fashion.
2852 */
2853 /** @todo walk the age list backwards and free the first with usage. */
2854 int rc = VINF_SUCCESS;
2855 do
2856 {
2857 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2858 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2859 rc = rc2;
2860 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2861 return rc;
2862}
2863
2864
2865/**
2866 * Inserts a page into the cache.
2867 *
2868 * This will create user node for the page, insert it into the GCPhys
2869 * hash, and insert it into the age list.
2870 *
2871 * @returns VBox status code.
2872 * @retval VINF_SUCCESS if successfully added.
2873 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2874 * @param pPool The pool.
2875 * @param pPage The cached page.
2876 * @param GCPhys The GC physical address of the page we're gonna shadow.
2877 * @param iUser The user index.
2878 * @param iUserTable The user table index.
2879 */
2880DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2881{
2882 int rc = VINF_SUCCESS;
2883 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2884
2885 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable));
2886
2887 if (iUser != NIL_PGMPOOL_IDX)
2888 {
2889#ifdef VBOX_STRICT
2890 /*
2891 * Check that the entry doesn't already exists.
2892 */
2893 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2894 {
2895 uint16_t i = pPage->iUserHead;
2896 do
2897 {
2898 Assert(i < pPool->cMaxUsers);
2899 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2900 i = paUsers[i].iNext;
2901 } while (i != NIL_PGMPOOL_USER_INDEX);
2902 }
2903#endif
2904
2905 /*
2906 * Find free a user node.
2907 */
2908 uint16_t i = pPool->iUserFreeHead;
2909 if (i == NIL_PGMPOOL_USER_INDEX)
2910 {
2911 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2912 if (RT_FAILURE(rc))
2913 return rc;
2914 i = pPool->iUserFreeHead;
2915 }
2916
2917 /*
2918 * Unlink the user node from the free list,
2919 * initialize and insert it into the user list.
2920 */
2921 pPool->iUserFreeHead = paUsers[i].iNext;
2922 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2923 paUsers[i].iUser = iUser;
2924 paUsers[i].iUserTable = iUserTable;
2925 pPage->iUserHead = i;
2926 }
2927 else
2928 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2929
2930
2931 /*
2932 * Insert into cache and enable monitoring of the guest page if enabled.
2933 *
2934 * Until we implement caching of all levels, including the CR3 one, we'll
2935 * have to make sure we don't try monitor & cache any recursive reuse of
2936 * a monitored CR3 page. Because all windows versions are doing this we'll
2937 * have to be able to do combined access monitoring, CR3 + PT and
2938 * PD + PT (guest PAE).
2939 *
2940 * Update:
2941 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2942 */
2943 const bool fCanBeMonitored = true;
2944 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2945 if (fCanBeMonitored)
2946 {
2947 rc = pgmPoolMonitorInsert(pPool, pPage);
2948 AssertRC(rc);
2949 }
2950 return rc;
2951}
2952
2953
2954/**
2955 * Adds a user reference to a page.
2956 *
2957 * This will move the page to the head of the
2958 *
2959 * @returns VBox status code.
2960 * @retval VINF_SUCCESS if successfully added.
2961 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2962 * @param pPool The pool.
2963 * @param pPage The cached page.
2964 * @param iUser The user index.
2965 * @param iUserTable The user table.
2966 */
2967static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2968{
2969 Log3(("pgmPoolTrackAddUser: GCPhys=%RGp iUser=%%x iUserTable=%x\n", pPage->GCPhys, iUser, iUserTable));
2970 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2971 Assert(iUser != NIL_PGMPOOL_IDX);
2972
2973# ifdef VBOX_STRICT
2974 /*
2975 * Check that the entry doesn't already exists. We only allow multiple
2976 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
2977 */
2978 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2979 {
2980 uint16_t i = pPage->iUserHead;
2981 do
2982 {
2983 Assert(i < pPool->cMaxUsers);
2984 /** @todo this assertion looks odd... Shouldn't it be && here? */
2985 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2986 i = paUsers[i].iNext;
2987 } while (i != NIL_PGMPOOL_USER_INDEX);
2988 }
2989# endif
2990
2991 /*
2992 * Allocate a user node.
2993 */
2994 uint16_t i = pPool->iUserFreeHead;
2995 if (i == NIL_PGMPOOL_USER_INDEX)
2996 {
2997 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2998 if (RT_FAILURE(rc))
2999 return rc;
3000 i = pPool->iUserFreeHead;
3001 }
3002 pPool->iUserFreeHead = paUsers[i].iNext;
3003
3004 /*
3005 * Initialize the user node and insert it.
3006 */
3007 paUsers[i].iNext = pPage->iUserHead;
3008 paUsers[i].iUser = iUser;
3009 paUsers[i].iUserTable = iUserTable;
3010 pPage->iUserHead = i;
3011
3012# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3013 if (pPage->fDirty)
3014 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirtyEntry, false /* do not remove */);
3015# endif
3016
3017 /*
3018 * Tell the cache to update its replacement stats for this page.
3019 */
3020 pgmPoolCacheUsed(pPool, pPage);
3021 return VINF_SUCCESS;
3022}
3023
3024
3025/**
3026 * Frees a user record associated with a page.
3027 *
3028 * This does not clear the entry in the user table, it simply replaces the
3029 * user record to the chain of free records.
3030 *
3031 * @param pPool The pool.
3032 * @param HCPhys The HC physical address of the shadow page.
3033 * @param iUser The shadow page pool index of the user table.
3034 * @param iUserTable The index into the user table (shadowed).
3035 *
3036 * @remarks Don't call this for root pages.
3037 */
3038static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3039{
3040 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
3041 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3042 Assert(iUser != NIL_PGMPOOL_IDX);
3043
3044 /*
3045 * Unlink and free the specified user entry.
3046 */
3047
3048 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
3049 uint16_t i = pPage->iUserHead;
3050 if ( i != NIL_PGMPOOL_USER_INDEX
3051 && paUsers[i].iUser == iUser
3052 && paUsers[i].iUserTable == iUserTable)
3053 {
3054 pPage->iUserHead = paUsers[i].iNext;
3055
3056 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3057 paUsers[i].iNext = pPool->iUserFreeHead;
3058 pPool->iUserFreeHead = i;
3059 return;
3060 }
3061
3062 /* General: Linear search. */
3063 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
3064 while (i != NIL_PGMPOOL_USER_INDEX)
3065 {
3066 if ( paUsers[i].iUser == iUser
3067 && paUsers[i].iUserTable == iUserTable)
3068 {
3069 if (iPrev != NIL_PGMPOOL_USER_INDEX)
3070 paUsers[iPrev].iNext = paUsers[i].iNext;
3071 else
3072 pPage->iUserHead = paUsers[i].iNext;
3073
3074 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3075 paUsers[i].iNext = pPool->iUserFreeHead;
3076 pPool->iUserFreeHead = i;
3077 return;
3078 }
3079 iPrev = i;
3080 i = paUsers[i].iNext;
3081 }
3082
3083 /* Fatal: didn't find it */
3084 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
3085 iUser, iUserTable, pPage->GCPhys));
3086}
3087
3088
3089/**
3090 * Gets the entry size of a shadow table.
3091 *
3092 * @param enmKind The kind of page.
3093 *
3094 * @returns The size of the entry in bytes. That is, 4 or 8.
3095 * @returns If the kind is not for a table, an assertion is raised and 0 is
3096 * returned.
3097 */
3098DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
3099{
3100 switch (enmKind)
3101 {
3102 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3103 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3104 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3105 case PGMPOOLKIND_32BIT_PD:
3106 case PGMPOOLKIND_32BIT_PD_PHYS:
3107 return 4;
3108
3109 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3110 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3111 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3112 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3113 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3114 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3115 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3116 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3117 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3118 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3119 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3120 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3121 case PGMPOOLKIND_64BIT_PML4:
3122 case PGMPOOLKIND_PAE_PDPT:
3123 case PGMPOOLKIND_ROOT_NESTED:
3124 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3125 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3126 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3127 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3128 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3129 case PGMPOOLKIND_PAE_PD_PHYS:
3130 case PGMPOOLKIND_PAE_PDPT_PHYS:
3131 return 8;
3132
3133 default:
3134 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3135 }
3136}
3137
3138
3139/**
3140 * Gets the entry size of a guest table.
3141 *
3142 * @param enmKind The kind of page.
3143 *
3144 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3145 * @returns If the kind is not for a table, an assertion is raised and 0 is
3146 * returned.
3147 */
3148DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3149{
3150 switch (enmKind)
3151 {
3152 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3153 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3154 case PGMPOOLKIND_32BIT_PD:
3155 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3156 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3157 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3158 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3159 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3160 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3161 return 4;
3162
3163 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3164 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3165 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3166 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3167 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3168 case PGMPOOLKIND_64BIT_PML4:
3169 case PGMPOOLKIND_PAE_PDPT:
3170 return 8;
3171
3172 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3173 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3174 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3175 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3176 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3177 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3178 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3179 case PGMPOOLKIND_ROOT_NESTED:
3180 case PGMPOOLKIND_PAE_PD_PHYS:
3181 case PGMPOOLKIND_PAE_PDPT_PHYS:
3182 case PGMPOOLKIND_32BIT_PD_PHYS:
3183 /** @todo can we return 0? (nobody is calling this...) */
3184 AssertFailed();
3185 return 0;
3186
3187 default:
3188 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3189 }
3190}
3191
3192
3193/**
3194 * Checks one shadow page table entry for a mapping of a physical page.
3195 *
3196 * @returns true / false indicating removal of all relevant PTEs
3197 *
3198 * @param pVM Pointer to the VM.
3199 * @param pPhysPage The guest page in question.
3200 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3201 * @param iShw The shadow page table.
3202 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3203 */
3204static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3205{
3206 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3207 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3208 bool fRet = false;
3209
3210 /*
3211 * Assert sanity.
3212 */
3213 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3214 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3215 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3216
3217 /*
3218 * Then, clear the actual mappings to the page in the shadow PT.
3219 */
3220 switch (pPage->enmKind)
3221 {
3222 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3223 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3224 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3225 {
3226 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3227 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3228 uint32_t u32AndMask = 0;
3229 uint32_t u32OrMask = 0;
3230
3231 if (!fFlushPTEs)
3232 {
3233 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3234 {
3235 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
3236 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
3237 u32OrMask = X86_PTE_RW;
3238 u32AndMask = UINT32_MAX;
3239 fRet = true;
3240 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3241 break;
3242
3243 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3244 u32OrMask = 0;
3245 u32AndMask = ~X86_PTE_RW;
3246 fRet = true;
3247 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3248 break;
3249 default:
3250 /* (shouldn't be here, will assert below) */
3251 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3252 break;
3253 }
3254 }
3255 else
3256 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3257
3258 /* Update the counter if we're removing references. */
3259 if (!u32AndMask)
3260 {
3261 Assert(pPage->cPresent);
3262 Assert(pPool->cPresent);
3263 pPage->cPresent--;
3264 pPool->cPresent--;
3265 }
3266
3267 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3268 {
3269 X86PTE Pte;
3270
3271 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3272 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3273 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3274 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3275
3276 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3277 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3278 return fRet;
3279 }
3280#ifdef LOG_ENABLED
3281 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3282 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3283 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3284 {
3285 Log(("i=%d cFound=%d\n", i, ++cFound));
3286 }
3287#endif
3288 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3289 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3290 break;
3291 }
3292
3293 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3294 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3295 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3296 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3297 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3298 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3299 {
3300 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3301 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3302 uint64_t u64OrMask = 0;
3303 uint64_t u64AndMask = 0;
3304
3305 if (!fFlushPTEs)
3306 {
3307 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3308 {
3309 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3310 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3311 u64OrMask = X86_PTE_RW;
3312 u64AndMask = UINT64_MAX;
3313 fRet = true;
3314 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3315 break;
3316
3317 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3318 u64OrMask = 0;
3319 u64AndMask = ~(uint64_t)X86_PTE_RW;
3320 fRet = true;
3321 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3322 break;
3323
3324 default:
3325 /* (shouldn't be here, will assert below) */
3326 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3327 break;
3328 }
3329 }
3330 else
3331 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3332
3333 /* Update the counter if we're removing references. */
3334 if (!u64AndMask)
3335 {
3336 Assert(pPage->cPresent);
3337 Assert(pPool->cPresent);
3338 pPage->cPresent--;
3339 pPool->cPresent--;
3340 }
3341
3342 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3343 {
3344 X86PTEPAE Pte;
3345
3346 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3347 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3348 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3349 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3350
3351 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3352 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3353 return fRet;
3354 }
3355#ifdef LOG_ENABLED
3356 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3357 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3358 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3359 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3360 Log(("i=%d cFound=%d\n", i, ++cFound));
3361#endif
3362 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3363 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3364 break;
3365 }
3366
3367#ifdef PGM_WITH_LARGE_PAGES
3368 /* Large page case only. */
3369 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3370 {
3371 Assert(pVM->pgm.s.fNestedPaging);
3372
3373 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3374 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3375
3376 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3377 {
3378 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3379 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3380 pPD->a[iPte].u = 0;
3381 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3382
3383 /* Update the counter as we're removing references. */
3384 Assert(pPage->cPresent);
3385 Assert(pPool->cPresent);
3386 pPage->cPresent--;
3387 pPool->cPresent--;
3388
3389 return fRet;
3390 }
3391# ifdef LOG_ENABLED
3392 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3393 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3394 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3395 Log(("i=%d cFound=%d\n", i, ++cFound));
3396# endif
3397 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3398 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3399 break;
3400 }
3401
3402 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3403 case PGMPOOLKIND_PAE_PD_PHYS:
3404 {
3405 Assert(pVM->pgm.s.fNestedPaging);
3406
3407 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3408 PX86PD pPD = (PX86PD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3409
3410 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3411 {
3412 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3413 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3414 pPD->a[iPte].u = 0;
3415 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3416
3417 /* Update the counter as we're removing references. */
3418 Assert(pPage->cPresent);
3419 Assert(pPool->cPresent);
3420 pPage->cPresent--;
3421 pPool->cPresent--;
3422 return fRet;
3423 }
3424# ifdef LOG_ENABLED
3425 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3426 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3427 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3428 Log(("i=%d cFound=%d\n", i, ++cFound));
3429# endif
3430 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3431 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3432 break;
3433 }
3434#endif /* PGM_WITH_LARGE_PAGES */
3435
3436 default:
3437 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3438 }
3439
3440 /* not reached. */
3441#ifndef _MSC_VER
3442 return fRet;
3443#endif
3444}
3445
3446
3447/**
3448 * Scans one shadow page table for mappings of a physical page.
3449 *
3450 * @param pVM Pointer to the VM.
3451 * @param pPhysPage The guest page in question.
3452 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3453 * @param iShw The shadow page table.
3454 */
3455static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3456{
3457 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3458
3459 /* We should only come here with when there's only one reference to this physical page. */
3460 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3461
3462 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3463 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3464 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3465 if (!fKeptPTEs)
3466 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3467 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3468}
3469
3470
3471/**
3472 * Flushes a list of shadow page tables mapping the same physical page.
3473 *
3474 * @param pVM Pointer to the VM.
3475 * @param pPhysPage The guest page in question.
3476 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3477 * @param iPhysExt The physical cross reference extent list to flush.
3478 */
3479static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3480{
3481 PGM_LOCK_ASSERT_OWNER(pVM);
3482 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3483 bool fKeepList = false;
3484
3485 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3486 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3487
3488 const uint16_t iPhysExtStart = iPhysExt;
3489 PPGMPOOLPHYSEXT pPhysExt;
3490 do
3491 {
3492 Assert(iPhysExt < pPool->cMaxPhysExts);
3493 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3494 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3495 {
3496 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3497 {
3498 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3499 if (!fKeptPTEs)
3500 {
3501 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3502 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3503 }
3504 else
3505 fKeepList = true;
3506 }
3507 }
3508 /* next */
3509 iPhysExt = pPhysExt->iNext;
3510 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3511
3512 if (!fKeepList)
3513 {
3514 /* insert the list into the free list and clear the ram range entry. */
3515 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3516 pPool->iPhysExtFreeHead = iPhysExtStart;
3517 /* Invalidate the tracking data. */
3518 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3519 }
3520
3521 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3522}
3523
3524
3525/**
3526 * Flushes all shadow page table mappings of the given guest page.
3527 *
3528 * This is typically called when the host page backing the guest one has been
3529 * replaced or when the page protection was changed due to a guest access
3530 * caught by the monitoring.
3531 *
3532 * @returns VBox status code.
3533 * @retval VINF_SUCCESS if all references has been successfully cleared.
3534 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3535 * pool cleaning. FF and sync flags are set.
3536 *
3537 * @param pVM Pointer to the VM.
3538 * @param GCPhysPage GC physical address of the page in question
3539 * @param pPhysPage The guest page in question.
3540 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3541 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3542 * flushed, it is NOT touched if this isn't necessary.
3543 * The caller MUST initialized this to @a false.
3544 */
3545int pgmPoolTrackUpdateGCPhys(PVM pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3546{
3547 PVMCPU pVCpu = VMMGetCpu(pVM);
3548 pgmLock(pVM);
3549 int rc = VINF_SUCCESS;
3550
3551#ifdef PGM_WITH_LARGE_PAGES
3552 /* Is this page part of a large page? */
3553 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3554 {
3555 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3556 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3557
3558 /* Fetch the large page base. */
3559 PPGMPAGE pLargePage;
3560 if (GCPhysBase != GCPhysPage)
3561 {
3562 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3563 AssertFatal(pLargePage);
3564 }
3565 else
3566 pLargePage = pPhysPage;
3567
3568 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3569
3570 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3571 {
3572 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3573 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3574 pVM->pgm.s.cLargePagesDisabled++;
3575
3576 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3577 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3578
3579 *pfFlushTLBs = true;
3580 pgmUnlock(pVM);
3581 return rc;
3582 }
3583 }
3584#else
3585 NOREF(GCPhysPage);
3586#endif /* PGM_WITH_LARGE_PAGES */
3587
3588 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3589 if (u16)
3590 {
3591 /*
3592 * The zero page is currently screwing up the tracking and we'll
3593 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3594 * is defined, zero pages won't normally be mapped. Some kind of solution
3595 * will be needed for this problem of course, but it will have to wait...
3596 */
3597 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3598 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3599 rc = VINF_PGM_GCPHYS_ALIASED;
3600 else
3601 {
3602# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC) /** @todo we can drop this now. */
3603 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3604 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3605 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
3606# endif
3607
3608 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3609 {
3610 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3611 pgmPoolTrackFlushGCPhysPT(pVM,
3612 pPhysPage,
3613 fFlushPTEs,
3614 PGMPOOL_TD_GET_IDX(u16));
3615 }
3616 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3617 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3618 else
3619 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3620 *pfFlushTLBs = true;
3621
3622# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
3623 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
3624# endif
3625 }
3626 }
3627
3628 if (rc == VINF_PGM_GCPHYS_ALIASED)
3629 {
3630 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3631 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3632 rc = VINF_PGM_SYNC_CR3;
3633 }
3634 pgmUnlock(pVM);
3635 return rc;
3636}
3637
3638
3639/**
3640 * Scans all shadow page tables for mappings of a physical page.
3641 *
3642 * This may be slow, but it's most likely more efficient than cleaning
3643 * out the entire page pool / cache.
3644 *
3645 * @returns VBox status code.
3646 * @retval VINF_SUCCESS if all references has been successfully cleared.
3647 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3648 * a page pool cleaning.
3649 *
3650 * @param pVM Pointer to the VM.
3651 * @param pPhysPage The guest page in question.
3652 */
3653int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3654{
3655 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3656 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3657 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3658 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3659
3660 /*
3661 * There is a limit to what makes sense.
3662 */
3663 if ( pPool->cPresent > 1024
3664 && pVM->cCpus == 1)
3665 {
3666 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3667 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3668 return VINF_PGM_GCPHYS_ALIASED;
3669 }
3670
3671 /*
3672 * Iterate all the pages until we've encountered all that in use.
3673 * This is simple but not quite optimal solution.
3674 */
3675 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P; /** @todo drop X86_PTE_P here as we always test if present separately, anyway. */
3676 const uint32_t u32 = u64; /** @todo move into the 32BIT_PT_xx case */
3677 unsigned cLeft = pPool->cUsedPages;
3678 unsigned iPage = pPool->cCurPages;
3679 while (--iPage >= PGMPOOL_IDX_FIRST)
3680 {
3681 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3682 if ( pPage->GCPhys != NIL_RTGCPHYS
3683 && pPage->cPresent)
3684 {
3685 switch (pPage->enmKind)
3686 {
3687 /*
3688 * We only care about shadow page tables.
3689 */
3690 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3691 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3692 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3693 {
3694 unsigned cPresent = pPage->cPresent;
3695 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3696 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3697 if (pPT->a[i].n.u1Present)
3698 {
3699 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3700 {
3701 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3702 pPT->a[i].u = 0;
3703
3704 /* Update the counter as we're removing references. */
3705 Assert(pPage->cPresent);
3706 Assert(pPool->cPresent);
3707 pPage->cPresent--;
3708 pPool->cPresent--;
3709 }
3710 if (!--cPresent)
3711 break;
3712 }
3713 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3714 break;
3715 }
3716
3717 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3718 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3719 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3720 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3721 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3722 {
3723 unsigned cPresent = pPage->cPresent;
3724 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3725 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3726 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3727 {
3728 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3729 {
3730 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3731 PGMSHWPTEPAE_SET(pPT->a[i], 0); /// @todo why not atomic?
3732
3733 /* Update the counter as we're removing references. */
3734 Assert(pPage->cPresent);
3735 Assert(pPool->cPresent);
3736 pPage->cPresent--;
3737 pPool->cPresent--;
3738 }
3739 if (!--cPresent)
3740 break;
3741 }
3742 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3743 break;
3744 }
3745#ifndef IN_RC
3746 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3747 {
3748 unsigned cPresent = pPage->cPresent;
3749 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3750 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3751 if (pPT->a[i].n.u1Present)
3752 {
3753 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3754 {
3755 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3756 pPT->a[i].u = 0;
3757
3758 /* Update the counter as we're removing references. */
3759 Assert(pPage->cPresent);
3760 Assert(pPool->cPresent);
3761 pPage->cPresent--;
3762 pPool->cPresent--;
3763 }
3764 if (!--cPresent)
3765 break;
3766 }
3767 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3768 break;
3769 }
3770#endif
3771 }
3772 if (!--cLeft)
3773 break;
3774 }
3775 }
3776
3777 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3778 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3779
3780 /*
3781 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3782 */
3783 if (pPool->cPresent > 1024)
3784 {
3785 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3786 return VINF_PGM_GCPHYS_ALIASED;
3787 }
3788
3789 return VINF_SUCCESS;
3790}
3791
3792
3793/**
3794 * Clears the user entry in a user table.
3795 *
3796 * This is used to remove all references to a page when flushing it.
3797 */
3798static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3799{
3800 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3801 Assert(pUser->iUser < pPool->cCurPages);
3802 uint32_t iUserTable = pUser->iUserTable;
3803
3804 /*
3805 * Map the user page. Ignore references made by fictitious pages.
3806 */
3807 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3808 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3809 union
3810 {
3811 uint64_t *pau64;
3812 uint32_t *pau32;
3813 } u;
3814 if (pUserPage->idx < PGMPOOL_IDX_FIRST)
3815 {
3816 Assert(!pUserPage->pvPageR3);
3817 return;
3818 }
3819 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3820
3821
3822 /* Safety precaution in case we change the paging for other modes too in the future. */
3823 Assert(!pgmPoolIsPageLocked(pPage));
3824
3825#ifdef VBOX_STRICT
3826 /*
3827 * Some sanity checks.
3828 */
3829 switch (pUserPage->enmKind)
3830 {
3831 case PGMPOOLKIND_32BIT_PD:
3832 case PGMPOOLKIND_32BIT_PD_PHYS:
3833 Assert(iUserTable < X86_PG_ENTRIES);
3834 break;
3835 case PGMPOOLKIND_PAE_PDPT:
3836 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3837 case PGMPOOLKIND_PAE_PDPT_PHYS:
3838 Assert(iUserTable < 4);
3839 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3840 break;
3841 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3842 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3843 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3844 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3845 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3846 case PGMPOOLKIND_PAE_PD_PHYS:
3847 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3848 break;
3849 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3850 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3851 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3852 break;
3853 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3854 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3855 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3856 break;
3857 case PGMPOOLKIND_64BIT_PML4:
3858 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3859 /* GCPhys >> PAGE_SHIFT is the index here */
3860 break;
3861 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3862 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3863 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3864 break;
3865
3866 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3867 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3868 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3869 break;
3870
3871 case PGMPOOLKIND_ROOT_NESTED:
3872 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3873 break;
3874
3875 default:
3876 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3877 break;
3878 }
3879#endif /* VBOX_STRICT */
3880
3881 /*
3882 * Clear the entry in the user page.
3883 */
3884 switch (pUserPage->enmKind)
3885 {
3886 /* 32-bit entries */
3887 case PGMPOOLKIND_32BIT_PD:
3888 case PGMPOOLKIND_32BIT_PD_PHYS:
3889 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
3890 break;
3891
3892 /* 64-bit entries */
3893 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3894 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3895 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3896 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3897 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3898#ifdef IN_RC
3899 /*
3900 * In 32 bits PAE mode we *must* invalidate the TLB when changing a
3901 * PDPT entry; the CPU fetches them only during cr3 load, so any
3902 * non-present PDPT will continue to cause page faults.
3903 */
3904 ASMReloadCR3();
3905 /* no break */
3906#endif
3907 case PGMPOOLKIND_PAE_PD_PHYS:
3908 case PGMPOOLKIND_PAE_PDPT_PHYS:
3909 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3910 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3911 case PGMPOOLKIND_64BIT_PML4:
3912 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3913 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3914 case PGMPOOLKIND_PAE_PDPT:
3915 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3916 case PGMPOOLKIND_ROOT_NESTED:
3917 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3918 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3919 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
3920 break;
3921
3922 default:
3923 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3924 }
3925 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
3926}
3927
3928
3929/**
3930 * Clears all users of a page.
3931 */
3932static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3933{
3934 /*
3935 * Free all the user records.
3936 */
3937 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3938
3939 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3940 uint16_t i = pPage->iUserHead;
3941 while (i != NIL_PGMPOOL_USER_INDEX)
3942 {
3943 /* Clear enter in user table. */
3944 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3945
3946 /* Free it. */
3947 const uint16_t iNext = paUsers[i].iNext;
3948 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3949 paUsers[i].iNext = pPool->iUserFreeHead;
3950 pPool->iUserFreeHead = i;
3951
3952 /* Next. */
3953 i = iNext;
3954 }
3955 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3956}
3957
3958
3959/**
3960 * Allocates a new physical cross reference extent.
3961 *
3962 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3963 * @param pVM Pointer to the VM.
3964 * @param piPhysExt Where to store the phys ext index.
3965 */
3966PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3967{
3968 PGM_LOCK_ASSERT_OWNER(pVM);
3969 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3970 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3971 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3972 {
3973 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3974 return NULL;
3975 }
3976 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3977 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3978 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3979 *piPhysExt = iPhysExt;
3980 return pPhysExt;
3981}
3982
3983
3984/**
3985 * Frees a physical cross reference extent.
3986 *
3987 * @param pVM Pointer to the VM.
3988 * @param iPhysExt The extent to free.
3989 */
3990void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3991{
3992 PGM_LOCK_ASSERT_OWNER(pVM);
3993 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3994 Assert(iPhysExt < pPool->cMaxPhysExts);
3995 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3996 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3997 {
3998 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3999 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4000 }
4001 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4002 pPool->iPhysExtFreeHead = iPhysExt;
4003}
4004
4005
4006/**
4007 * Frees a physical cross reference extent.
4008 *
4009 * @param pVM Pointer to the VM.
4010 * @param iPhysExt The extent to free.
4011 */
4012void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
4013{
4014 PGM_LOCK_ASSERT_OWNER(pVM);
4015 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4016
4017 const uint16_t iPhysExtStart = iPhysExt;
4018 PPGMPOOLPHYSEXT pPhysExt;
4019 do
4020 {
4021 Assert(iPhysExt < pPool->cMaxPhysExts);
4022 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4023 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4024 {
4025 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4026 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4027 }
4028
4029 /* next */
4030 iPhysExt = pPhysExt->iNext;
4031 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4032
4033 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4034 pPool->iPhysExtFreeHead = iPhysExtStart;
4035}
4036
4037
4038/**
4039 * Insert a reference into a list of physical cross reference extents.
4040 *
4041 * @returns The new tracking data for PGMPAGE.
4042 *
4043 * @param pVM Pointer to the VM.
4044 * @param iPhysExt The physical extent index of the list head.
4045 * @param iShwPT The shadow page table index.
4046 * @param iPte Page table entry
4047 *
4048 */
4049static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
4050{
4051 PGM_LOCK_ASSERT_OWNER(pVM);
4052 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4053 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4054
4055 /*
4056 * Special common cases.
4057 */
4058 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
4059 {
4060 paPhysExts[iPhysExt].aidx[1] = iShwPT;
4061 paPhysExts[iPhysExt].apte[1] = iPte;
4062 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4063 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
4064 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4065 }
4066 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
4067 {
4068 paPhysExts[iPhysExt].aidx[2] = iShwPT;
4069 paPhysExts[iPhysExt].apte[2] = iPte;
4070 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4071 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
4072 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4073 }
4074 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
4075
4076 /*
4077 * General treatment.
4078 */
4079 const uint16_t iPhysExtStart = iPhysExt;
4080 unsigned cMax = 15;
4081 for (;;)
4082 {
4083 Assert(iPhysExt < pPool->cMaxPhysExts);
4084 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4085 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
4086 {
4087 paPhysExts[iPhysExt].aidx[i] = iShwPT;
4088 paPhysExts[iPhysExt].apte[i] = iPte;
4089 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4090 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
4091 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
4092 }
4093 if (!--cMax)
4094 {
4095 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackOverflows);
4096 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4097 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
4098 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4099 }
4100
4101 /* advance */
4102 iPhysExt = paPhysExts[iPhysExt].iNext;
4103 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4104 break;
4105 }
4106
4107 /*
4108 * Add another extent to the list.
4109 */
4110 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4111 if (!pNew)
4112 {
4113 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackNoExtentsLeft);
4114 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4115 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4116 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4117 }
4118 pNew->iNext = iPhysExtStart;
4119 pNew->aidx[0] = iShwPT;
4120 pNew->apte[0] = iPte;
4121 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4122 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4123}
4124
4125
4126/**
4127 * Add a reference to guest physical page where extents are in use.
4128 *
4129 * @returns The new tracking data for PGMPAGE.
4130 *
4131 * @param pVM Pointer to the VM.
4132 * @param pPhysPage Pointer to the aPages entry in the ram range.
4133 * @param u16 The ram range flags (top 16-bits).
4134 * @param iShwPT The shadow page table index.
4135 * @param iPte Page table entry
4136 */
4137uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4138{
4139 pgmLock(pVM);
4140 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4141 {
4142 /*
4143 * Convert to extent list.
4144 */
4145 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4146 uint16_t iPhysExt;
4147 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4148 if (pPhysExt)
4149 {
4150 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4151 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliased);
4152 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4153 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4154 pPhysExt->aidx[1] = iShwPT;
4155 pPhysExt->apte[1] = iPte;
4156 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4157 }
4158 else
4159 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4160 }
4161 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4162 {
4163 /*
4164 * Insert into the extent list.
4165 */
4166 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4167 }
4168 else
4169 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedLots);
4170 pgmUnlock(pVM);
4171 return u16;
4172}
4173
4174
4175/**
4176 * Clear references to guest physical memory.
4177 *
4178 * @param pPool The pool.
4179 * @param pPage The page.
4180 * @param pPhysPage Pointer to the aPages entry in the ram range.
4181 * @param iPte Shadow PTE index
4182 */
4183void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4184{
4185 PVM pVM = pPool->CTX_SUFF(pVM);
4186 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4187 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4188
4189 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4190 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4191 {
4192 pgmLock(pVM);
4193
4194 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4195 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4196 do
4197 {
4198 Assert(iPhysExt < pPool->cMaxPhysExts);
4199
4200 /*
4201 * Look for the shadow page and check if it's all freed.
4202 */
4203 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4204 {
4205 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4206 && paPhysExts[iPhysExt].apte[i] == iPte)
4207 {
4208 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4209 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4210
4211 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4212 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4213 {
4214 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4215 pgmUnlock(pVM);
4216 return;
4217 }
4218
4219 /* we can free the node. */
4220 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4221 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4222 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4223 {
4224 /* lonely node */
4225 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4226 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4227 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4228 }
4229 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4230 {
4231 /* head */
4232 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4233 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4234 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4235 }
4236 else
4237 {
4238 /* in list */
4239 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4240 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4241 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4242 }
4243 iPhysExt = iPhysExtNext;
4244 pgmUnlock(pVM);
4245 return;
4246 }
4247 }
4248
4249 /* next */
4250 iPhysExtPrev = iPhysExt;
4251 iPhysExt = paPhysExts[iPhysExt].iNext;
4252 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4253
4254 pgmUnlock(pVM);
4255 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4256 }
4257 else /* nothing to do */
4258 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4259}
4260
4261/**
4262 * Clear references to guest physical memory.
4263 *
4264 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4265 * physical address is assumed to be correct, so the linear search can be
4266 * skipped and we can assert at an earlier point.
4267 *
4268 * @param pPool The pool.
4269 * @param pPage The page.
4270 * @param HCPhys The host physical address corresponding to the guest page.
4271 * @param GCPhys The guest physical address corresponding to HCPhys.
4272 * @param iPte Shadow PTE index
4273 */
4274static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4275{
4276 /*
4277 * Lookup the page and check if it checks out before derefing it.
4278 */
4279 PVM pVM = pPool->CTX_SUFF(pVM);
4280 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4281 if (pPhysPage)
4282 {
4283 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4284#ifdef LOG_ENABLED
4285 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4286 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4287#endif
4288 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4289 {
4290 Assert(pPage->cPresent);
4291 Assert(pPool->cPresent);
4292 pPage->cPresent--;
4293 pPool->cPresent--;
4294 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4295 return;
4296 }
4297
4298 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp\n",
4299 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage)));
4300 }
4301 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4302}
4303
4304
4305/**
4306 * Clear references to guest physical memory.
4307 *
4308 * @param pPool The pool.
4309 * @param pPage The page.
4310 * @param HCPhys The host physical address corresponding to the guest page.
4311 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4312 * @param iPte Shadow pte index
4313 */
4314void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4315{
4316 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4317
4318 /*
4319 * Try the hint first.
4320 */
4321 RTHCPHYS HCPhysHinted;
4322 PVM pVM = pPool->CTX_SUFF(pVM);
4323 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4324 if (pPhysPage)
4325 {
4326 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4327 Assert(HCPhysHinted);
4328 if (HCPhysHinted == HCPhys)
4329 {
4330 Assert(pPage->cPresent);
4331 Assert(pPool->cPresent);
4332 pPage->cPresent--;
4333 pPool->cPresent--;
4334 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4335 return;
4336 }
4337 }
4338 else
4339 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4340
4341 /*
4342 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4343 */
4344 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4345 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX);
4346 while (pRam)
4347 {
4348 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4349 while (iPage-- > 0)
4350 {
4351 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4352 {
4353 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4354 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4355 Assert(pPage->cPresent);
4356 Assert(pPool->cPresent);
4357 pPage->cPresent--;
4358 pPool->cPresent--;
4359 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4360 return;
4361 }
4362 }
4363 pRam = pRam->CTX_SUFF(pNext);
4364 }
4365
4366 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4367}
4368
4369
4370/**
4371 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4372 *
4373 * @param pPool The pool.
4374 * @param pPage The page.
4375 * @param pShwPT The shadow page table (mapping of the page).
4376 * @param pGstPT The guest page table.
4377 */
4378DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4379{
4380 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4381 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4382 {
4383 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4384 if (pShwPT->a[i].n.u1Present)
4385 {
4386 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4387 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4388 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & fPgMask, i);
4389 if (!pPage->cPresent)
4390 break;
4391 }
4392 }
4393}
4394
4395
4396/**
4397 * Clear references to guest physical memory in a PAE / 32-bit page table.
4398 *
4399 * @param pPool The pool.
4400 * @param pPage The page.
4401 * @param pShwPT The shadow page table (mapping of the page).
4402 * @param pGstPT The guest page table (just a half one).
4403 */
4404DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4405{
4406 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4407 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4408 {
4409 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4410 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4411 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4412 {
4413 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4414 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4415 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4416 if (!pPage->cPresent)
4417 break;
4418 }
4419 }
4420}
4421
4422
4423/**
4424 * Clear references to guest physical memory in a PAE / PAE page table.
4425 *
4426 * @param pPool The pool.
4427 * @param pPage The page.
4428 * @param pShwPT The shadow page table (mapping of the page).
4429 * @param pGstPT The guest page table.
4430 */
4431DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4432{
4433 RTGCPHYS const fPgMask = pPage->fA20Enabled ? X86_PTE_PAE_PG_MASK : X86_PTE_PAE_PG_MASK & ~RT_BIT_64(20);
4434 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4435 {
4436 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4437 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4438 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4439 {
4440 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4441 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4442 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4443 if (!pPage->cPresent)
4444 break;
4445 }
4446 }
4447}
4448
4449
4450/**
4451 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4452 *
4453 * @param pPool The pool.
4454 * @param pPage The page.
4455 * @param pShwPT The shadow page table (mapping of the page).
4456 */
4457DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4458{
4459 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4460 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4461 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4462 {
4463 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4464 if (pShwPT->a[i].n.u1Present)
4465 {
4466 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4467 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4468 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4469 if (!pPage->cPresent)
4470 break;
4471 }
4472 }
4473}
4474
4475
4476/**
4477 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4478 *
4479 * @param pPool The pool.
4480 * @param pPage The page.
4481 * @param pShwPT The shadow page table (mapping of the page).
4482 */
4483DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4484{
4485 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4486 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4487 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4488 {
4489 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4490 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4491 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4492 {
4493 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4494 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4495 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys & GCPhysA20Mask, i);
4496 if (!pPage->cPresent)
4497 break;
4498 }
4499 }
4500}
4501
4502
4503/**
4504 * Clear references to shadowed pages in an EPT page table.
4505 *
4506 * @param pPool The pool.
4507 * @param pPage The page.
4508 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4509 */
4510DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4511{
4512 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4513 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4514 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4515 {
4516 Assert((pShwPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4517 if (pShwPT->a[i].n.u1Present)
4518 {
4519 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4520 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4521 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4522 if (!pPage->cPresent)
4523 break;
4524 }
4525 }
4526}
4527
4528
4529/**
4530 * Clear references to shadowed pages in a 32 bits page directory.
4531 *
4532 * @param pPool The pool.
4533 * @param pPage The page.
4534 * @param pShwPD The shadow page directory (mapping of the page).
4535 */
4536DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4537{
4538 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4539 {
4540 Assert(!(pShwPD->a[i].u & RT_BIT_32(9)));
4541 if ( pShwPD->a[i].n.u1Present
4542 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4543 )
4544 {
4545 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4546 if (pSubPage)
4547 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4548 else
4549 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4550 }
4551 }
4552}
4553
4554
4555/**
4556 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4557 *
4558 * @param pPool The pool.
4559 * @param pPage The page.
4560 * @param pShwPD The shadow page directory (mapping of the page).
4561 */
4562DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4563{
4564 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4565 {
4566 if ( pShwPD->a[i].n.u1Present
4567 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING))
4568 {
4569#ifdef PGM_WITH_LARGE_PAGES
4570 if (pShwPD->a[i].b.u1Size)
4571 {
4572 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4573 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4574 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK,
4575 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4576 i);
4577 }
4578 else
4579#endif
4580 {
4581 Assert((pShwPD->a[i].u & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4582 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4583 if (pSubPage)
4584 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4585 else
4586 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4587 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4588 }
4589 }
4590 }
4591}
4592
4593
4594/**
4595 * Clear references to shadowed pages in a PAE page directory pointer table.
4596 *
4597 * @param pPool The pool.
4598 * @param pPage The page.
4599 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4600 */
4601DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4602{
4603 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4604 {
4605 Assert((pShwPDPT->a[i].u & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4606 if ( pShwPDPT->a[i].n.u1Present
4607 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4608 )
4609 {
4610 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4611 if (pSubPage)
4612 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4613 else
4614 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4615 }
4616 }
4617}
4618
4619
4620/**
4621 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4622 *
4623 * @param pPool The pool.
4624 * @param pPage The page.
4625 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4626 */
4627DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4628{
4629 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4630 {
4631 Assert((pShwPDPT->a[i].u & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4632 if (pShwPDPT->a[i].n.u1Present)
4633 {
4634 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4635 if (pSubPage)
4636 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4637 else
4638 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4639 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4640 }
4641 }
4642}
4643
4644
4645/**
4646 * Clear references to shadowed pages in a 64-bit level 4 page table.
4647 *
4648 * @param pPool The pool.
4649 * @param pPage The page.
4650 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4651 */
4652DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4653{
4654 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4655 {
4656 Assert((pShwPML4->a[i].u & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4657 if (pShwPML4->a[i].n.u1Present)
4658 {
4659 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4660 if (pSubPage)
4661 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4662 else
4663 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4664 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4665 }
4666 }
4667}
4668
4669
4670/**
4671 * Clear references to shadowed pages in an EPT page directory.
4672 *
4673 * @param pPool The pool.
4674 * @param pPage The page.
4675 * @param pShwPD The shadow page directory (mapping of the page).
4676 */
4677DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4678{
4679 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4680 {
4681 Assert((pShwPD->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4682 if (pShwPD->a[i].n.u1Present)
4683 {
4684#ifdef PGM_WITH_LARGE_PAGES
4685 if (pShwPD->a[i].b.u1Size)
4686 {
4687 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4688 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4689 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK,
4690 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4691 i);
4692 }
4693 else
4694#endif
4695 {
4696 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4697 if (pSubPage)
4698 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4699 else
4700 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4701 }
4702 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4703 }
4704 }
4705}
4706
4707
4708/**
4709 * Clear references to shadowed pages in an EPT page directory pointer table.
4710 *
4711 * @param pPool The pool.
4712 * @param pPage The page.
4713 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4714 */
4715DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4716{
4717 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4718 {
4719 Assert((pShwPDPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4720 if (pShwPDPT->a[i].n.u1Present)
4721 {
4722 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4723 if (pSubPage)
4724 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4725 else
4726 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4727 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4728 }
4729 }
4730}
4731
4732
4733/**
4734 * Clears all references made by this page.
4735 *
4736 * This includes other shadow pages and GC physical addresses.
4737 *
4738 * @param pPool The pool.
4739 * @param pPage The page.
4740 */
4741static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4742{
4743 /*
4744 * Map the shadow page and take action according to the page kind.
4745 */
4746 PVM pVM = pPool->CTX_SUFF(pVM);
4747 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4748 switch (pPage->enmKind)
4749 {
4750 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4751 {
4752 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4753 void *pvGst;
4754 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4755 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4756 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4757 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4758 break;
4759 }
4760
4761 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4762 {
4763 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4764 void *pvGst;
4765 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4766 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
4767 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4768 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4769 break;
4770 }
4771
4772 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4773 {
4774 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4775 void *pvGst;
4776 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4777 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
4778 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4779 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4780 break;
4781 }
4782
4783 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4784 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4785 {
4786 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4787 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4788 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4789 break;
4790 }
4791
4792 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4793 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4794 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4795 {
4796 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4797 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
4798 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4799 break;
4800 }
4801
4802 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4803 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4804 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4805 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4806 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4807 case PGMPOOLKIND_PAE_PD_PHYS:
4808 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4809 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4810 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4811 break;
4812
4813 case PGMPOOLKIND_32BIT_PD_PHYS:
4814 case PGMPOOLKIND_32BIT_PD:
4815 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4816 break;
4817
4818 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4819 case PGMPOOLKIND_PAE_PDPT:
4820 case PGMPOOLKIND_PAE_PDPT_PHYS:
4821 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4822 break;
4823
4824 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4825 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4826 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4827 break;
4828
4829 case PGMPOOLKIND_64BIT_PML4:
4830 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4831 break;
4832
4833 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4834 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4835 break;
4836
4837 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4838 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4839 break;
4840
4841 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4842 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4843 break;
4844
4845 default:
4846 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4847 }
4848
4849 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4850 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4851 ASMMemZeroPage(pvShw);
4852 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4853 pPage->fZeroed = true;
4854 Assert(!pPage->cPresent);
4855 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
4856}
4857
4858
4859/**
4860 * Flushes a pool page.
4861 *
4862 * This moves the page to the free list after removing all user references to it.
4863 *
4864 * @returns VBox status code.
4865 * @retval VINF_SUCCESS on success.
4866 * @param pPool The pool.
4867 * @param HCPhys The HC physical address of the shadow page.
4868 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4869 */
4870int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4871{
4872 PVM pVM = pPool->CTX_SUFF(pVM);
4873 bool fFlushRequired = false;
4874
4875 int rc = VINF_SUCCESS;
4876 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4877 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4878 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4879
4880 /*
4881 * Reject any attempts at flushing any of the special root pages (shall
4882 * not happen).
4883 */
4884 AssertMsgReturn(pPage->idx >= PGMPOOL_IDX_FIRST,
4885 ("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n",
4886 pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx),
4887 VINF_SUCCESS);
4888
4889 pgmLock(pVM);
4890
4891 /*
4892 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4893 */
4894 if (pgmPoolIsPageLocked(pPage))
4895 {
4896 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4897 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4898 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4899 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4900 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4901 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4902 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4903 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4904 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
4905 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
4906 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4907 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4908 pgmUnlock(pVM);
4909 return VINF_SUCCESS;
4910 }
4911
4912#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4913 /* Start a subset so we won't run out of mapping space. */
4914 PVMCPU pVCpu = VMMGetCpu(pVM);
4915 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
4916#endif
4917
4918 /*
4919 * Mark the page as being in need of an ASMMemZeroPage().
4920 */
4921 pPage->fZeroed = false;
4922
4923#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4924 if (pPage->fDirty)
4925 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirtyEntry, false /* do not remove */);
4926#endif
4927
4928 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4929 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4930 fFlushRequired = true;
4931
4932 /*
4933 * Clear the page.
4934 */
4935 pgmPoolTrackClearPageUsers(pPool, pPage);
4936 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4937 pgmPoolTrackDeref(pPool, pPage);
4938 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4939
4940 /*
4941 * Flush it from the cache.
4942 */
4943 pgmPoolCacheFlushPage(pPool, pPage);
4944
4945#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4946 /* Heavy stuff done. */
4947 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
4948#endif
4949
4950 /*
4951 * Deregistering the monitoring.
4952 */
4953 if (pPage->fMonitored)
4954 rc = pgmPoolMonitorFlush(pPool, pPage);
4955
4956 /*
4957 * Free the page.
4958 */
4959 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4960 pPage->iNext = pPool->iFreeHead;
4961 pPool->iFreeHead = pPage->idx;
4962 pPage->enmKind = PGMPOOLKIND_FREE;
4963 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4964 pPage->GCPhys = NIL_RTGCPHYS;
4965 pPage->fReusedFlushPending = false;
4966
4967 pPool->cUsedPages--;
4968
4969 /* Flush the TLBs of all VCPUs if required. */
4970 if ( fFlushRequired
4971 && fFlush)
4972 {
4973 PGM_INVL_ALL_VCPU_TLBS(pVM);
4974 }
4975
4976 pgmUnlock(pVM);
4977 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4978 return rc;
4979}
4980
4981
4982/**
4983 * Frees a usage of a pool page.
4984 *
4985 * The caller is responsible to updating the user table so that it no longer
4986 * references the shadow page.
4987 *
4988 * @param pPool The pool.
4989 * @param HCPhys The HC physical address of the shadow page.
4990 * @param iUser The shadow page pool index of the user table.
4991 * NIL_PGMPOOL_IDX for root pages.
4992 * @param iUserTable The index into the user table (shadowed). Ignored if
4993 * root page.
4994 */
4995void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4996{
4997 PVM pVM = pPool->CTX_SUFF(pVM);
4998
4999 STAM_PROFILE_START(&pPool->StatFree, a);
5000 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
5001 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
5002 AssertReturnVoid(pPage->idx >= PGMPOOL_IDX_FIRST); /* paranoia (#6349) */
5003
5004 pgmLock(pVM);
5005 if (iUser != NIL_PGMPOOL_IDX)
5006 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
5007 if (!pPage->fCached)
5008 pgmPoolFlushPage(pPool, pPage);
5009 pgmUnlock(pVM);
5010 STAM_PROFILE_STOP(&pPool->StatFree, a);
5011}
5012
5013
5014/**
5015 * Makes one or more free page free.
5016 *
5017 * @returns VBox status code.
5018 * @retval VINF_SUCCESS on success.
5019 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
5020 *
5021 * @param pPool The pool.
5022 * @param enmKind Page table kind
5023 * @param iUser The user of the page.
5024 */
5025static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
5026{
5027 PVM pVM = pPool->CTX_SUFF(pVM);
5028 LogFlow(("pgmPoolMakeMoreFreePages: enmKind=%d iUser=%d\n", enmKind, iUser));
5029 NOREF(enmKind);
5030
5031 /*
5032 * If the pool isn't full grown yet, expand it.
5033 */
5034 if ( pPool->cCurPages < pPool->cMaxPages
5035#if defined(IN_RC)
5036 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
5037 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
5038 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
5039#endif
5040 )
5041 {
5042 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
5043#ifdef IN_RING3
5044 int rc = PGMR3PoolGrow(pVM);
5045#else
5046 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
5047#endif
5048 if (RT_FAILURE(rc))
5049 return rc;
5050 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
5051 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
5052 return VINF_SUCCESS;
5053 }
5054
5055 /*
5056 * Free one cached page.
5057 */
5058 return pgmPoolCacheFreeOne(pPool, iUser);
5059}
5060
5061
5062/**
5063 * Allocates a page from the pool.
5064 *
5065 * This page may actually be a cached page and not in need of any processing
5066 * on the callers part.
5067 *
5068 * @returns VBox status code.
5069 * @retval VINF_SUCCESS if a NEW page was allocated.
5070 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
5071 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
5072 *
5073 * @param pVM Pointer to the VM.
5074 * @param GCPhys The GC physical address of the page we're gonna shadow.
5075 * For 4MB and 2MB PD entries, it's the first address the
5076 * shadow PT is covering.
5077 * @param enmKind The kind of mapping.
5078 * @param enmAccess Access type for the mapping (only relevant for big pages)
5079 * @param fA20Enabled Whether the A20 gate is enabled or not.
5080 * @param iUser The shadow page pool index of the user table. Root
5081 * pages should pass NIL_PGMPOOL_IDX.
5082 * @param iUserTable The index into the user table (shadowed). Ignored for
5083 * root pages (iUser == NIL_PGMPOOL_IDX).
5084 * @param fLockPage Lock the page
5085 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
5086 */
5087int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
5088 uint16_t iUser, uint32_t iUserTable, bool fLockPage, PPPGMPOOLPAGE ppPage)
5089{
5090 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5091 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
5092 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
5093 *ppPage = NULL;
5094 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
5095 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
5096 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
5097
5098 pgmLock(pVM);
5099
5100 if (pPool->fCacheEnabled)
5101 {
5102 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, fA20Enabled, iUser, iUserTable, ppPage);
5103 if (RT_SUCCESS(rc2))
5104 {
5105 if (fLockPage)
5106 pgmPoolLockPage(pPool, *ppPage);
5107 pgmUnlock(pVM);
5108 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5109 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
5110 return rc2;
5111 }
5112 }
5113
5114 /*
5115 * Allocate a new one.
5116 */
5117 int rc = VINF_SUCCESS;
5118 uint16_t iNew = pPool->iFreeHead;
5119 if (iNew == NIL_PGMPOOL_IDX)
5120 {
5121 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
5122 if (RT_FAILURE(rc))
5123 {
5124 pgmUnlock(pVM);
5125 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
5126 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5127 return rc;
5128 }
5129 iNew = pPool->iFreeHead;
5130 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_PGM_POOL_IPE);
5131 }
5132
5133 /* unlink the free head */
5134 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5135 pPool->iFreeHead = pPage->iNext;
5136 pPage->iNext = NIL_PGMPOOL_IDX;
5137
5138 /*
5139 * Initialize it.
5140 */
5141 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5142 pPage->enmKind = enmKind;
5143 pPage->enmAccess = enmAccess;
5144 pPage->GCPhys = GCPhys;
5145 pPage->fA20Enabled = fA20Enabled;
5146 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5147 pPage->fMonitored = false;
5148 pPage->fCached = false;
5149 pPage->fDirty = false;
5150 pPage->fReusedFlushPending = false;
5151 pPage->cModifications = 0;
5152 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5153 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5154 pPage->cPresent = 0;
5155 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5156 pPage->idxDirtyEntry = 0;
5157 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5158 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5159 pPage->cLastAccessHandler = 0;
5160 pPage->cLocked = 0;
5161# ifdef VBOX_STRICT
5162 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5163# endif
5164
5165 /*
5166 * Insert into the tracking and cache. If this fails, free the page.
5167 */
5168 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5169 if (RT_FAILURE(rc3))
5170 {
5171 pPool->cUsedPages--;
5172 pPage->enmKind = PGMPOOLKIND_FREE;
5173 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5174 pPage->GCPhys = NIL_RTGCPHYS;
5175 pPage->iNext = pPool->iFreeHead;
5176 pPool->iFreeHead = pPage->idx;
5177 pgmUnlock(pVM);
5178 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5179 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
5180 return rc3;
5181 }
5182
5183 /*
5184 * Commit the allocation, clear the page and return.
5185 */
5186#ifdef VBOX_WITH_STATISTICS
5187 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5188 pPool->cUsedPagesHigh = pPool->cUsedPages;
5189#endif
5190
5191 if (!pPage->fZeroed)
5192 {
5193 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5194 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5195 ASMMemZeroPage(pv);
5196 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5197 }
5198
5199 *ppPage = pPage;
5200 if (fLockPage)
5201 pgmPoolLockPage(pPool, pPage);
5202 pgmUnlock(pVM);
5203 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5204 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5205 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5206 return rc;
5207}
5208
5209
5210/**
5211 * Frees a usage of a pool page.
5212 *
5213 * @param pVM Pointer to the VM.
5214 * @param HCPhys The HC physical address of the shadow page.
5215 * @param iUser The shadow page pool index of the user table.
5216 * NIL_PGMPOOL_IDX if root page.
5217 * @param iUserTable The index into the user table (shadowed). Ignored if
5218 * root page.
5219 */
5220void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5221{
5222 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5223 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5224 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5225}
5226
5227
5228/**
5229 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5230 *
5231 * @returns Pointer to the shadow page structure.
5232 * @param pPool The pool.
5233 * @param HCPhys The HC physical address of the shadow page.
5234 */
5235PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5236{
5237 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5238
5239 /*
5240 * Look up the page.
5241 */
5242 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5243
5244 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5245 return pPage;
5246}
5247
5248
5249/**
5250 * Internal worker for finding a page for debugging purposes, no assertions.
5251 *
5252 * @returns Pointer to the shadow page structure. NULL on if not found.
5253 * @param pPool The pool.
5254 * @param HCPhys The HC physical address of the shadow page.
5255 */
5256PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5257{
5258 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5259 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5260}
5261
5262#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5263
5264/**
5265 * Flush the specified page if present
5266 *
5267 * @param pVM Pointer to the VM.
5268 * @param GCPhys Guest physical address of the page to flush
5269 */
5270void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5271{
5272 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5273
5274 VM_ASSERT_EMT(pVM);
5275
5276 /*
5277 * Look up the GCPhys in the hash.
5278 */
5279 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5280 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5281 if (i == NIL_PGMPOOL_IDX)
5282 return;
5283
5284 do
5285 {
5286 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5287 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5288 {
5289 switch (pPage->enmKind)
5290 {
5291 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5292 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5293 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5294 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5295 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5296 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5297 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5298 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5299 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5300 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5301 case PGMPOOLKIND_64BIT_PML4:
5302 case PGMPOOLKIND_32BIT_PD:
5303 case PGMPOOLKIND_PAE_PDPT:
5304 {
5305 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5306#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5307 if (pPage->fDirty)
5308 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5309 else
5310#endif
5311 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5312 Assert(!pgmPoolIsPageLocked(pPage));
5313 pgmPoolMonitorChainFlush(pPool, pPage);
5314 return;
5315 }
5316
5317 /* ignore, no monitoring. */
5318 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5319 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5320 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5321 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5322 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5323 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5324 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5325 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5326 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5327 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5328 case PGMPOOLKIND_ROOT_NESTED:
5329 case PGMPOOLKIND_PAE_PD_PHYS:
5330 case PGMPOOLKIND_PAE_PDPT_PHYS:
5331 case PGMPOOLKIND_32BIT_PD_PHYS:
5332 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5333 break;
5334
5335 default:
5336 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5337 }
5338 }
5339
5340 /* next */
5341 i = pPage->iNext;
5342 } while (i != NIL_PGMPOOL_IDX);
5343 return;
5344}
5345
5346#endif /* IN_RING3 */
5347#ifdef IN_RING3
5348
5349/**
5350 * Reset CPU on hot plugging.
5351 *
5352 * @param pVM Pointer to the VM.
5353 * @param pVCpu The virtual CPU.
5354 */
5355void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5356{
5357 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5358
5359 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5360 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5361 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5362}
5363
5364
5365/**
5366 * Flushes the entire cache.
5367 *
5368 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5369 * this and execute this CR3 flush.
5370 *
5371 * @param pPool The pool.
5372 */
5373void pgmR3PoolReset(PVM pVM)
5374{
5375 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5376
5377 PGM_LOCK_ASSERT_OWNER(pVM);
5378 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5379 LogFlow(("pgmR3PoolReset:\n"));
5380
5381 /*
5382 * If there are no pages in the pool, there is nothing to do.
5383 */
5384 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5385 {
5386 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5387 return;
5388 }
5389
5390 /*
5391 * Exit the shadow mode since we're going to clear everything,
5392 * including the root page.
5393 */
5394 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5395 pgmR3ExitShadowModeBeforePoolFlush(&pVM->aCpus[i]);
5396
5397 /*
5398 * Nuke the free list and reinsert all pages into it.
5399 */
5400 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5401 {
5402 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5403
5404 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
5405 if (pPage->fMonitored)
5406 pgmPoolMonitorFlush(pPool, pPage);
5407 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5408 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5409 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5410 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5411 pPage->cModifications = 0;
5412 pPage->GCPhys = NIL_RTGCPHYS;
5413 pPage->enmKind = PGMPOOLKIND_FREE;
5414 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5415 Assert(pPage->idx == i);
5416 pPage->iNext = i + 1;
5417 pPage->fA20Enabled = true;
5418 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5419 pPage->fSeenNonGlobal = false;
5420 pPage->fMonitored = false;
5421 pPage->fDirty = false;
5422 pPage->fCached = false;
5423 pPage->fReusedFlushPending = false;
5424 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5425 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5426 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5427 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5428 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5429 pPage->cLastAccessHandler = 0;
5430 pPage->cLocked = 0;
5431#ifdef VBOX_STRICT
5432 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5433#endif
5434 }
5435 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5436 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5437 pPool->cUsedPages = 0;
5438
5439 /*
5440 * Zap and reinitialize the user records.
5441 */
5442 pPool->cPresent = 0;
5443 pPool->iUserFreeHead = 0;
5444 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5445 const unsigned cMaxUsers = pPool->cMaxUsers;
5446 for (unsigned i = 0; i < cMaxUsers; i++)
5447 {
5448 paUsers[i].iNext = i + 1;
5449 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5450 paUsers[i].iUserTable = 0xfffffffe;
5451 }
5452 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5453
5454 /*
5455 * Clear all the GCPhys links and rebuild the phys ext free list.
5456 */
5457 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
5458 pRam;
5459 pRam = pRam->CTX_SUFF(pNext))
5460 {
5461 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5462 while (iPage-- > 0)
5463 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5464 }
5465
5466 pPool->iPhysExtFreeHead = 0;
5467 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5468 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5469 for (unsigned i = 0; i < cMaxPhysExts; i++)
5470 {
5471 paPhysExts[i].iNext = i + 1;
5472 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5473 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5474 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5475 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5476 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5477 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5478 }
5479 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5480
5481 /*
5482 * Just zap the modified list.
5483 */
5484 pPool->cModifiedPages = 0;
5485 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5486
5487 /*
5488 * Clear the GCPhys hash and the age list.
5489 */
5490 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5491 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5492 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5493 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5494
5495#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5496 /* Clear all dirty pages. */
5497 pPool->idxFreeDirtyPage = 0;
5498 pPool->cDirtyPages = 0;
5499 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
5500 pPool->aDirtyPages[i].uIdx = NIL_PGMPOOL_IDX;
5501#endif
5502
5503 /*
5504 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5505 */
5506 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5507 {
5508 /*
5509 * Re-enter the shadowing mode and assert Sync CR3 FF.
5510 */
5511 PVMCPU pVCpu = &pVM->aCpus[i];
5512 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5513 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5514 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5515 }
5516
5517 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5518}
5519
5520#endif /* IN_RING3 */
5521
5522#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5523/**
5524 * Stringifies a PGMPOOLKIND value.
5525 */
5526static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5527{
5528 switch ((PGMPOOLKIND)enmKind)
5529 {
5530 case PGMPOOLKIND_INVALID:
5531 return "PGMPOOLKIND_INVALID";
5532 case PGMPOOLKIND_FREE:
5533 return "PGMPOOLKIND_FREE";
5534 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5535 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5536 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5537 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5538 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5539 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5540 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5541 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5542 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5543 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5544 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5545 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5546 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5547 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5548 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5549 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5550 case PGMPOOLKIND_32BIT_PD:
5551 return "PGMPOOLKIND_32BIT_PD";
5552 case PGMPOOLKIND_32BIT_PD_PHYS:
5553 return "PGMPOOLKIND_32BIT_PD_PHYS";
5554 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5555 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5556 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5557 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5558 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5559 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5560 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5561 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5562 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5563 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5564 case PGMPOOLKIND_PAE_PD_PHYS:
5565 return "PGMPOOLKIND_PAE_PD_PHYS";
5566 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5567 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5568 case PGMPOOLKIND_PAE_PDPT:
5569 return "PGMPOOLKIND_PAE_PDPT";
5570 case PGMPOOLKIND_PAE_PDPT_PHYS:
5571 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5572 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5573 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5574 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5575 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5576 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5577 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5578 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5579 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5580 case PGMPOOLKIND_64BIT_PML4:
5581 return "PGMPOOLKIND_64BIT_PML4";
5582 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5583 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5584 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5585 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5586 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5587 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5588 case PGMPOOLKIND_ROOT_NESTED:
5589 return "PGMPOOLKIND_ROOT_NESTED";
5590 }
5591 return "Unknown kind!";
5592}
5593#endif /* LOG_ENABLED || VBOX_STRICT */
5594
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette