VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 61142

Last change on this file since 61142 was 61142, checked in by vboxsync, 9 years ago

pgmPoolAccessPfHandlerSimple: Deal with larger writes (e.g. AVX regs).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 213.7 KB
Line 
1/* $Id: PGMAllPool.cpp 61142 2016-05-23 22:01:44Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2015 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#include <VBox/vmm/pgm.h>
24#include <VBox/vmm/mm.h>
25#include <VBox/vmm/em.h>
26#include <VBox/vmm/cpum.h>
27#ifdef IN_RC
28# include <VBox/vmm/patm.h>
29#endif
30#include "PGMInternal.h"
31#include <VBox/vmm/vm.h>
32#include "PGMInline.h"
33#include <VBox/disopcode.h>
34#include <VBox/vmm/hm_vmx.h>
35
36#include <VBox/log.h>
37#include <VBox/err.h>
38#include <iprt/asm.h>
39#include <iprt/asm-amd64-x86.h>
40#include <iprt/string.h>
41
42
43/*********************************************************************************************************************************
44* Internal Functions *
45*********************************************************************************************************************************/
46RT_C_DECLS_BEGIN
47DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
48DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
49static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
50static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
51static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
52static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
54static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
55#endif
56#if 0 /*defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)*/
57static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
58#endif
59
60int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
61PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
62void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
63void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
64
65RT_C_DECLS_END
66
67
68/**
69 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
70 *
71 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
72 * @param enmKind The page kind.
73 */
74DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
75{
76 switch (enmKind)
77 {
78 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
79 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
81 return true;
82 default:
83 return false;
84 }
85}
86
87
88/**
89 * Flushes a chain of pages sharing the same access monitor.
90 *
91 * @returns VBox status code suitable for scheduling.
92 * @param pPool The pool.
93 * @param pPage A page in the chain.
94 * @todo VBOXSTRICTRC
95 */
96int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
97{
98 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
99
100 /*
101 * Find the list head.
102 */
103 uint16_t idx = pPage->idx;
104 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
105 {
106 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
107 {
108 idx = pPage->iMonitoredPrev;
109 Assert(idx != pPage->idx);
110 pPage = &pPool->aPages[idx];
111 }
112 }
113
114 /*
115 * Iterate the list flushing each shadow page.
116 */
117 int rc = VINF_SUCCESS;
118 for (;;)
119 {
120 idx = pPage->iMonitoredNext;
121 Assert(idx != pPage->idx);
122 if (pPage->idx >= PGMPOOL_IDX_FIRST)
123 {
124 int rc2 = pgmPoolFlushPage(pPool, pPage);
125 AssertRC(rc2);
126 }
127 /* next */
128 if (idx == NIL_PGMPOOL_IDX)
129 break;
130 pPage = &pPool->aPages[idx];
131 }
132 return rc;
133}
134
135
136/**
137 * Wrapper for getting the current context pointer to the entry being modified.
138 *
139 * @returns VBox status code suitable for scheduling.
140 * @param pVM The cross context VM structure.
141 * @param pvDst Destination address
142 * @param pvSrc Pointer to the mapping of @a GCPhysSrc or NULL depending
143 * on the context (e.g. \#PF in R0 & RC).
144 * @param GCPhysSrc The source guest physical address.
145 * @param cb Size of data to read
146 */
147DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, void const *pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
148{
149#if defined(IN_RING3)
150 NOREF(pVM); NOREF(GCPhysSrc);
151 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
152 return VINF_SUCCESS;
153#else
154 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
155 NOREF(pvSrc);
156 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
157#endif
158}
159
160
161/**
162 * Process shadow entries before they are changed by the guest.
163 *
164 * For PT entries we will clear them. For PD entries, we'll simply check
165 * for mapping conflicts and set the SyncCR3 FF if found.
166 *
167 * @param pVCpu The cross context virtual CPU structure.
168 * @param pPool The pool.
169 * @param pPage The head page.
170 * @param GCPhysFault The guest physical fault address.
171 * @param pvAddress Pointer to the mapping of @a GCPhysFault or NULL
172 * depending on the context (e.g. \#PF in R0 & RC).
173 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
174 */
175static void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault,
176 void const *pvAddress, unsigned cbWrite)
177{
178 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
179 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
180 PVM pVM = pPool->CTX_SUFF(pVM);
181 NOREF(pVCpu);
182
183 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))pvAddress, GCPhysFault, cbWrite));
184
185 for (;;)
186 {
187 union
188 {
189 void *pv;
190 PX86PT pPT;
191 PPGMSHWPTPAE pPTPae;
192 PX86PD pPD;
193 PX86PDPAE pPDPae;
194 PX86PDPT pPDPT;
195 PX86PML4 pPML4;
196 } uShw;
197
198 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s write=%#x\n",
199 pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
200
201 uShw.pv = NULL;
202 switch (pPage->enmKind)
203 {
204 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
205 {
206 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
207 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
208 const unsigned iShw = off / sizeof(X86PTE);
209 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
210 if (uShw.pPT->a[iShw].n.u1Present)
211 {
212 X86PTE GstPte;
213
214 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
215 AssertRC(rc);
216 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
217 pgmPoolTracDerefGCPhysHint(pPool, pPage,
218 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
219 GstPte.u & X86_PTE_PG_MASK,
220 iShw);
221 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
222 }
223 break;
224 }
225
226 /* page/2 sized */
227 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
228 {
229 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
230 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
231 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
232 {
233 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
234 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
235 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
236 {
237 X86PTE GstPte;
238 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
239 AssertRC(rc);
240
241 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
242 pgmPoolTracDerefGCPhysHint(pPool, pPage,
243 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
244 GstPte.u & X86_PTE_PG_MASK,
245 iShw);
246 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
247 }
248 }
249 break;
250 }
251
252 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
253 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
254 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
255 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
256 {
257 unsigned iGst = off / sizeof(X86PDE);
258 unsigned iShwPdpt = iGst / 256;
259 unsigned iShw = (iGst % 256) * 2;
260 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
261
262 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
263 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
264 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
265 {
266 for (unsigned i = 0; i < 2; i++)
267 {
268# ifdef VBOX_WITH_RAW_MODE_NOT_R0
269 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
270 {
271 Assert(pgmMapAreMappingsEnabled(pVM));
272 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
273 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
274 break;
275 }
276# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
277 if (uShw.pPDPae->a[iShw+i].n.u1Present)
278 {
279 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
280 pgmPoolFree(pVM,
281 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
282 pPage->idx,
283 iShw + i);
284 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw+i].u, 0);
285 }
286
287 /* paranoia / a bit assumptive. */
288 if ( (off & 3)
289 && (off & 3) + cbWrite > 4)
290 {
291 const unsigned iShw2 = iShw + 2 + i;
292 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
293 {
294# ifdef VBOX_WITH_RAW_MODE_NOT_R0
295 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
296 {
297 Assert(pgmMapAreMappingsEnabled(pVM));
298 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
299 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
300 break;
301 }
302# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
303 if (uShw.pPDPae->a[iShw2].n.u1Present)
304 {
305 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
306 pgmPoolFree(pVM,
307 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
308 pPage->idx,
309 iShw2);
310 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
311 }
312 }
313 }
314 }
315 }
316 break;
317 }
318
319 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
320 {
321 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
322 const unsigned iShw = off / sizeof(X86PTEPAE);
323 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
324 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
325 {
326 X86PTEPAE GstPte;
327 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
328 AssertRC(rc);
329
330 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
331 pgmPoolTracDerefGCPhysHint(pPool, pPage,
332 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
333 GstPte.u & X86_PTE_PAE_PG_MASK,
334 iShw);
335 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
336 }
337
338 /* paranoia / a bit assumptive. */
339 if ( (off & 7)
340 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
341 {
342 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
343 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
344
345 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
346 {
347 X86PTEPAE GstPte;
348 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
349 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
350 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
351 AssertRC(rc);
352 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
353 pgmPoolTracDerefGCPhysHint(pPool, pPage,
354 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
355 GstPte.u & X86_PTE_PAE_PG_MASK,
356 iShw2);
357 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
358 }
359 }
360 break;
361 }
362
363 case PGMPOOLKIND_32BIT_PD:
364 {
365 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
366 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
367
368 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
369 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
370# ifdef VBOX_WITH_RAW_MODE_NOT_R0
371 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
372 {
373 Assert(pgmMapAreMappingsEnabled(pVM));
374 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
375 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
376 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
377 break;
378 }
379 else
380# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
381 {
382 if (uShw.pPD->a[iShw].n.u1Present)
383 {
384 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
385 pgmPoolFree(pVM,
386 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
387 pPage->idx,
388 iShw);
389 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
390 }
391 }
392 /* paranoia / a bit assumptive. */
393 if ( (off & 3)
394 && (off & 3) + cbWrite > sizeof(X86PTE))
395 {
396 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
397 if ( iShw2 != iShw
398 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
399 {
400# ifdef VBOX_WITH_RAW_MODE_NOT_R0
401 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
402 {
403 Assert(pgmMapAreMappingsEnabled(pVM));
404 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
405 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
406 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
407 break;
408 }
409# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
410 if (uShw.pPD->a[iShw2].n.u1Present)
411 {
412 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
413 pgmPoolFree(pVM,
414 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
415 pPage->idx,
416 iShw2);
417 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
418 }
419 }
420 }
421#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). - not working any longer... */
422 if ( uShw.pPD->a[iShw].n.u1Present
423 && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
424 {
425 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
426# ifdef IN_RC /* TLB load - we're pushing things a bit... */
427 ASMProbeReadByte(pvAddress);
428# endif
429 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
430 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
431 }
432#endif
433 break;
434 }
435
436 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
437 {
438 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
439 const unsigned iShw = off / sizeof(X86PDEPAE);
440 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
441#ifdef VBOX_WITH_RAW_MODE_NOT_R0
442 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
443 {
444 Assert(pgmMapAreMappingsEnabled(pVM));
445 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
446 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
447 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
448 break;
449 }
450#endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
451 /*
452 * Causes trouble when the guest uses a PDE to refer to the whole page table level
453 * structure. (Invalidate here; faults later on when it tries to change the page
454 * table entries -> recheck; probably only applies to the RC case.)
455 */
456#ifdef VBOX_WITH_RAW_MODE_NOT_R0
457 else
458#endif
459 {
460 if (uShw.pPDPae->a[iShw].n.u1Present)
461 {
462 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
463 pgmPoolFree(pVM,
464 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
465 pPage->idx,
466 iShw);
467 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
468 }
469 }
470 /* paranoia / a bit assumptive. */
471 if ( (off & 7)
472 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
473 {
474 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
475 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
476
477#ifdef VBOX_WITH_RAW_MODE_NOT_R0
478 if ( iShw2 != iShw
479 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
480 {
481 Assert(pgmMapAreMappingsEnabled(pVM));
482 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
483 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
484 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
485 break;
486 }
487 else
488#endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
489 if (uShw.pPDPae->a[iShw2].n.u1Present)
490 {
491 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
492 pgmPoolFree(pVM,
493 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
494 pPage->idx,
495 iShw2);
496 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
497 }
498 }
499 break;
500 }
501
502 case PGMPOOLKIND_PAE_PDPT:
503 {
504 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
505 /*
506 * Hopefully this doesn't happen very often:
507 * - touching unused parts of the page
508 * - messing with the bits of pd pointers without changing the physical address
509 */
510 /* PDPT roots are not page aligned; 32 byte only! */
511 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
512
513 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
514 const unsigned iShw = offPdpt / sizeof(X86PDPE);
515 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
516 {
517# ifdef VBOX_WITH_RAW_MODE_NOT_R0
518 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
519 {
520 Assert(pgmMapAreMappingsEnabled(pVM));
521 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
522 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
523 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
524 break;
525 }
526 else
527# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
528 if (uShw.pPDPT->a[iShw].n.u1Present)
529 {
530 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
531 pgmPoolFree(pVM,
532 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
533 pPage->idx,
534 iShw);
535 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
536 }
537
538 /* paranoia / a bit assumptive. */
539 if ( (offPdpt & 7)
540 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
541 {
542 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
543 if ( iShw2 != iShw
544 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
545 {
546# ifdef VBOX_WITH_RAW_MODE_NOT_R0
547 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
548 {
549 Assert(pgmMapAreMappingsEnabled(pVM));
550 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
551 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
552 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
553 break;
554 }
555 else
556# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
557 if (uShw.pPDPT->a[iShw2].n.u1Present)
558 {
559 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
560 pgmPoolFree(pVM,
561 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
562 pPage->idx,
563 iShw2);
564 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
565 }
566 }
567 }
568 }
569 break;
570 }
571
572#ifndef IN_RC
573 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
574 {
575 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
576 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
577 const unsigned iShw = off / sizeof(X86PDEPAE);
578 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
579 if (uShw.pPDPae->a[iShw].n.u1Present)
580 {
581 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
582 pgmPoolFree(pVM,
583 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
584 pPage->idx,
585 iShw);
586 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
587 }
588 /* paranoia / a bit assumptive. */
589 if ( (off & 7)
590 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
591 {
592 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
593 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
594
595 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
596 if (uShw.pPDPae->a[iShw2].n.u1Present)
597 {
598 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
599 pgmPoolFree(pVM,
600 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
601 pPage->idx,
602 iShw2);
603 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
604 }
605 }
606 break;
607 }
608
609 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
610 {
611 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
612 /*
613 * Hopefully this doesn't happen very often:
614 * - messing with the bits of pd pointers without changing the physical address
615 */
616 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
617 const unsigned iShw = off / sizeof(X86PDPE);
618 if (uShw.pPDPT->a[iShw].n.u1Present)
619 {
620 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
621 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
622 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
623 }
624 /* paranoia / a bit assumptive. */
625 if ( (off & 7)
626 && (off & 7) + cbWrite > sizeof(X86PDPE))
627 {
628 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
629 if (uShw.pPDPT->a[iShw2].n.u1Present)
630 {
631 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
632 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
633 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
634 }
635 }
636 break;
637 }
638
639 case PGMPOOLKIND_64BIT_PML4:
640 {
641 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
642 /*
643 * Hopefully this doesn't happen very often:
644 * - messing with the bits of pd pointers without changing the physical address
645 */
646 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
647 const unsigned iShw = off / sizeof(X86PDPE);
648 if (uShw.pPML4->a[iShw].n.u1Present)
649 {
650 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
651 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
652 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
653 }
654 /* paranoia / a bit assumptive. */
655 if ( (off & 7)
656 && (off & 7) + cbWrite > sizeof(X86PDPE))
657 {
658 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
659 if (uShw.pPML4->a[iShw2].n.u1Present)
660 {
661 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
662 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
663 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
664 }
665 }
666 break;
667 }
668#endif /* IN_RING0 */
669
670 default:
671 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
672 }
673 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
674
675 /* next */
676 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
677 return;
678 pPage = &pPool->aPages[pPage->iMonitoredNext];
679 }
680}
681
682# ifndef IN_RING3
683
684/**
685 * Checks if a access could be a fork operation in progress.
686 *
687 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
688 *
689 * @returns true if it's likely that we're forking, otherwise false.
690 * @param pPool The pool.
691 * @param pDis The disassembled instruction.
692 * @param offFault The access offset.
693 */
694DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
695{
696 /*
697 * i386 linux is using btr to clear X86_PTE_RW.
698 * The functions involved are (2.6.16 source inspection):
699 * clear_bit
700 * ptep_set_wrprotect
701 * copy_one_pte
702 * copy_pte_range
703 * copy_pmd_range
704 * copy_pud_range
705 * copy_page_range
706 * dup_mmap
707 * dup_mm
708 * copy_mm
709 * copy_process
710 * do_fork
711 */
712 if ( pDis->pCurInstr->uOpcode == OP_BTR
713 && !(offFault & 4)
714 /** @todo Validate that the bit index is X86_PTE_RW. */
715 )
716 {
717 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
718 return true;
719 }
720 return false;
721}
722
723
724/**
725 * Determine whether the page is likely to have been reused.
726 *
727 * @returns true if we consider the page as being reused for a different purpose.
728 * @returns false if we consider it to still be a paging page.
729 * @param pVM The cross context VM structure.
730 * @param pVCpu The cross context virtual CPU structure.
731 * @param pRegFrame Trap register frame.
732 * @param pDis The disassembly info for the faulting instruction.
733 * @param pvFault The fault address.
734 *
735 * @remark The REP prefix check is left to the caller because of STOSD/W.
736 */
737DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
738{
739#ifndef IN_RC
740 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
741 if ( HMHasPendingIrq(pVM)
742 && (pRegFrame->rsp - pvFault) < 32)
743 {
744 /* Fault caused by stack writes while trying to inject an interrupt event. */
745 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
746 return true;
747 }
748#else
749 NOREF(pVM); NOREF(pvFault);
750#endif
751
752 LogFlow(("Reused instr %RGv %d at %RGv param1.fUse=%llx param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->uOpcode, pvFault, pDis->Param1.fUse, pDis->Param1.Base.idxGenReg));
753
754 /* Non-supervisor mode write means it's used for something else. */
755 if (CPUMGetGuestCPL(pVCpu) == 3)
756 return true;
757
758 switch (pDis->pCurInstr->uOpcode)
759 {
760 /* call implies the actual push of the return address faulted */
761 case OP_CALL:
762 Log4(("pgmPoolMonitorIsReused: CALL\n"));
763 return true;
764 case OP_PUSH:
765 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
766 return true;
767 case OP_PUSHF:
768 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
769 return true;
770 case OP_PUSHA:
771 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
772 return true;
773 case OP_FXSAVE:
774 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
775 return true;
776 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
777 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
778 return true;
779 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
780 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
781 return true;
782 case OP_MOVSWD:
783 case OP_STOSWD:
784 if ( pDis->fPrefix == (DISPREFIX_REP|DISPREFIX_REX)
785 && pRegFrame->rcx >= 0x40
786 )
787 {
788 Assert(pDis->uCpuMode == DISCPUMODE_64BIT);
789
790 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
791 return true;
792 }
793 return false;
794 }
795 if ( ( (pDis->Param1.fUse & DISUSE_REG_GEN32)
796 || (pDis->Param1.fUse & DISUSE_REG_GEN64))
797 && (pDis->Param1.Base.idxGenReg == DISGREG_ESP))
798 {
799 Log4(("pgmPoolMonitorIsReused: ESP\n"));
800 return true;
801 }
802
803 return false;
804}
805
806
807/**
808 * Flushes the page being accessed.
809 *
810 * @returns VBox status code suitable for scheduling.
811 * @param pVM The cross context VM structure.
812 * @param pVCpu The cross context virtual CPU structure.
813 * @param pPool The pool.
814 * @param pPage The pool page (head).
815 * @param pDis The disassembly of the write instruction.
816 * @param pRegFrame The trap register frame.
817 * @param GCPhysFault The fault address as guest physical address.
818 * @param pvFault The fault address.
819 * @todo VBOXSTRICTRC
820 */
821static int pgmPoolAccessPfHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
822 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
823{
824 NOREF(pVM); NOREF(GCPhysFault);
825
826 /*
827 * First, do the flushing.
828 */
829 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
830
831 /*
832 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
833 * Must do this in raw mode (!); XP boot will fail otherwise.
834 */
835 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
836 if (rc2 == VINF_SUCCESS)
837 { /* do nothing */ }
838 else if (rc2 == VINF_EM_RESCHEDULE)
839 {
840 if (rc == VINF_SUCCESS)
841 rc = VBOXSTRICTRC_VAL(rc2);
842#ifndef IN_RING3
843 VMCPU_FF_SET(pVCpu, VMCPU_FF_TO_R3);
844#endif
845 }
846 else if (rc2 == VERR_EM_INTERPRETER)
847 {
848#ifdef IN_RC
849 if (PATMIsPatchGCAddr(pVM, pRegFrame->eip))
850 {
851 LogFlow(("pgmPoolAccessPfHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
852 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->eip));
853 rc = VINF_SUCCESS;
854 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
855 }
856 else
857#endif
858 {
859 rc = VINF_EM_RAW_EMULATE_INSTR;
860 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
861 }
862 }
863 else if (RT_FAILURE_NP(rc2))
864 rc = VBOXSTRICTRC_VAL(rc2);
865 else
866 AssertMsgFailed(("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
867
868 LogFlow(("pgmPoolAccessPfHandlerPT: returns %Rrc (flushed)\n", rc));
869 return rc;
870}
871
872
873/**
874 * Handles the STOSD write accesses.
875 *
876 * @returns VBox status code suitable for scheduling.
877 * @param pVM The cross context VM structure.
878 * @param pPool The pool.
879 * @param pPage The pool page (head).
880 * @param pDis The disassembly of the write instruction.
881 * @param pRegFrame The trap register frame.
882 * @param GCPhysFault The fault address as guest physical address.
883 * @param pvFault The fault address.
884 */
885DECLINLINE(int) pgmPoolAccessPfHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
886 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
887{
888 unsigned uIncrement = pDis->Param1.cb;
889 NOREF(pVM);
890
891 Assert(pDis->uCpuMode == DISCPUMODE_32BIT || pDis->uCpuMode == DISCPUMODE_64BIT);
892 Assert(pRegFrame->rcx <= 0x20);
893
894#ifdef VBOX_STRICT
895 if (pDis->uOpMode == DISCPUMODE_32BIT)
896 Assert(uIncrement == 4);
897 else
898 Assert(uIncrement == 8);
899#endif
900
901 Log3(("pgmPoolAccessPfHandlerSTOSD\n"));
902
903 /*
904 * Increment the modification counter and insert it into the list
905 * of modified pages the first time.
906 */
907 if (!pPage->cModifications++)
908 pgmPoolMonitorModifiedInsert(pPool, pPage);
909
910 /*
911 * Execute REP STOSD.
912 *
913 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
914 * write situation, meaning that it's safe to write here.
915 */
916 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
917 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
918 while (pRegFrame->rcx)
919 {
920#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
921 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
922 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
923 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
924#else
925 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
926#endif
927#ifdef IN_RC
928 *(uint32_t *)(uintptr_t)pu32 = pRegFrame->eax;
929#else
930 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
931#endif
932 pu32 += uIncrement;
933 GCPhysFault += uIncrement;
934 pRegFrame->rdi += uIncrement;
935 pRegFrame->rcx--;
936 }
937 pRegFrame->rip += pDis->cbInstr;
938
939 LogFlow(("pgmPoolAccessPfHandlerSTOSD: returns\n"));
940 return VINF_SUCCESS;
941}
942
943
944/**
945 * Handles the simple write accesses.
946 *
947 * @returns VBox status code suitable for scheduling.
948 * @param pVM The cross context VM structure.
949 * @param pVCpu The cross context virtual CPU structure.
950 * @param pPool The pool.
951 * @param pPage The pool page (head).
952 * @param pDis The disassembly of the write instruction.
953 * @param pRegFrame The trap register frame.
954 * @param GCPhysFault The fault address as guest physical address.
955 * @param pvFault The fault address.
956 * @param pfReused Reused state (in/out)
957 */
958DECLINLINE(int) pgmPoolAccessPfHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
959 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
960{
961 Log3(("pgmPoolAccessPfHandlerSimple\n"));
962 NOREF(pVM);
963 NOREF(pfReused); /* initialized by caller */
964
965 /*
966 * Increment the modification counter and insert it into the list
967 * of modified pages the first time.
968 */
969 if (!pPage->cModifications++)
970 pgmPoolMonitorModifiedInsert(pPool, pPage);
971
972 /*
973 * Clear all the pages. ASSUMES that pvFault is readable.
974 */
975#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
976 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
977#endif
978
979 uint32_t cbWrite = DISGetParamSize(pDis, &pDis->Param1);
980 if (cbWrite <= 8)
981 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, cbWrite);
982 else if (cbWrite <= 16)
983 {
984 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, 8);
985 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + 8, NULL, cbWrite - 8);
986 }
987 else
988 {
989 Assert(cbWrite <= 32);
990 for (uint32_t off = 0; off < cbWrite; off += 8)
991 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + off, NULL, RT_MIN(8, cbWrite - off));
992 }
993
994#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
995 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
996#endif
997
998 /*
999 * Interpret the instruction.
1000 */
1001 VBOXSTRICTRC rc = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
1002 if (RT_SUCCESS(rc))
1003 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
1004 else if (rc == VERR_EM_INTERPRETER)
1005 {
1006 LogFlow(("pgmPoolAccessPfHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1007 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode));
1008 rc = VINF_EM_RAW_EMULATE_INSTR;
1009 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1010 }
1011
1012#if 0 /* experimental code */
1013 if (rc == VINF_SUCCESS)
1014 {
1015 switch (pPage->enmKind)
1016 {
1017 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1018 {
1019 X86PTEPAE GstPte;
1020 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1021 AssertRC(rc);
1022
1023 /* Check the new value written by the guest. If present and with a bogus physical address, then
1024 * it's fairly safe to assume the guest is reusing the PT.
1025 */
1026 if (GstPte.n.u1Present)
1027 {
1028 RTHCPHYS HCPhys = -1;
1029 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1030 if (rc != VINF_SUCCESS)
1031 {
1032 *pfReused = true;
1033 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1034 }
1035 }
1036 break;
1037 }
1038 }
1039 }
1040#endif
1041
1042 LogFlow(("pgmPoolAccessPfHandlerSimple: returns %Rrc\n", VBOXSTRICTRC_VAL(rc)));
1043 return VBOXSTRICTRC_VAL(rc);
1044}
1045
1046
1047/**
1048 * @callback_method_impl{FNPGMRZPHYSPFHANDLER,
1049 * \#PF access handler callback for page table pages.}
1050 *
1051 * @remarks The @a pvUser argument points to the PGMPOOLPAGE.
1052 */
1053DECLEXPORT(VBOXSTRICTRC) pgmPoolAccessPfHandler(PVM pVM, PVMCPU pVCpu, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame,
1054 RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1055{
1056 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1057 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1058 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1059 unsigned cMaxModifications;
1060 bool fForcedFlush = false;
1061 NOREF(uErrorCode);
1062
1063 LogFlow(("pgmPoolAccessPfHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1064
1065 pgmLock(pVM);
1066 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1067 {
1068 /* Pool page changed while we were waiting for the lock; ignore. */
1069 Log(("CPU%d: pgmPoolAccessPfHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1070 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1071 pgmUnlock(pVM);
1072 return VINF_SUCCESS;
1073 }
1074#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1075 if (pPage->fDirty)
1076 {
1077 Assert(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH));
1078 pgmUnlock(pVM);
1079 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1080 }
1081#endif
1082
1083#if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1084 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1085 {
1086 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1087 void *pvGst;
1088 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1089 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1090 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1091 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1092 }
1093#endif
1094
1095 /*
1096 * Disassemble the faulting instruction.
1097 */
1098 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1099 int rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
1100 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1101 {
1102 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1103 pgmUnlock(pVM);
1104 return rc;
1105 }
1106
1107 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1108
1109 /*
1110 * We should ALWAYS have the list head as user parameter. This
1111 * is because we use that page to record the changes.
1112 */
1113 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1114
1115#ifdef IN_RING0
1116 /* Maximum nr of modifications depends on the page type. */
1117 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1118 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1119 cMaxModifications = 4;
1120 else
1121 cMaxModifications = 24;
1122#else
1123 cMaxModifications = 48;
1124#endif
1125
1126 /*
1127 * Incremental page table updates should weigh more than random ones.
1128 * (Only applies when started from offset 0)
1129 */
1130 pVCpu->pgm.s.cPoolAccessHandler++;
1131 if ( pPage->GCPtrLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1132 && pPage->GCPtrLastAccessHandlerRip < pRegFrame->rip + 0x40
1133 && pvFault == (pPage->GCPtrLastAccessHandlerFault + pDis->Param1.cb)
1134 && pVCpu->pgm.s.cPoolAccessHandler == pPage->cLastAccessHandler + 1)
1135 {
1136 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1137 Assert(pPage->cModifications < 32000);
1138 pPage->cModifications = pPage->cModifications * 2;
1139 pPage->GCPtrLastAccessHandlerFault = pvFault;
1140 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1141 if (pPage->cModifications >= cMaxModifications)
1142 {
1143 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1144 fForcedFlush = true;
1145 }
1146 }
1147
1148 if (pPage->cModifications >= cMaxModifications)
1149 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1150
1151 /*
1152 * Check if it's worth dealing with.
1153 */
1154 bool fReused = false;
1155 bool fNotReusedNotForking = false;
1156 if ( ( pPage->cModifications < cMaxModifications /** @todo \#define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1157 || pgmPoolIsPageLocked(pPage)
1158 )
1159 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1160 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1161 {
1162 /*
1163 * Simple instructions, no REP prefix.
1164 */
1165 if (!(pDis->fPrefix & (DISPREFIX_REP | DISPREFIX_REPNE)))
1166 {
1167 rc = pgmPoolAccessPfHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1168 if (fReused)
1169 goto flushPage;
1170
1171 /* A mov instruction to change the first page table entry will be remembered so we can detect
1172 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1173 */
1174 if ( rc == VINF_SUCCESS
1175 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1176 && pDis->pCurInstr->uOpcode == OP_MOV
1177 && (pvFault & PAGE_OFFSET_MASK) == 0)
1178 {
1179 pPage->GCPtrLastAccessHandlerFault = pvFault;
1180 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1181 pPage->GCPtrLastAccessHandlerRip = pRegFrame->rip;
1182 /* Make sure we don't kick out a page too quickly. */
1183 if (pPage->cModifications > 8)
1184 pPage->cModifications = 2;
1185 }
1186 else if (pPage->GCPtrLastAccessHandlerFault == pvFault)
1187 {
1188 /* ignore the 2nd write to this page table entry. */
1189 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1190 }
1191 else
1192 {
1193 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
1194 pPage->GCPtrLastAccessHandlerRip = 0;
1195 }
1196
1197 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1198 pgmUnlock(pVM);
1199 return rc;
1200 }
1201
1202 /*
1203 * Windows is frequently doing small memset() operations (netio test 4k+).
1204 * We have to deal with these or we'll kill the cache and performance.
1205 */
1206 if ( pDis->pCurInstr->uOpcode == OP_STOSWD
1207 && !pRegFrame->eflags.Bits.u1DF
1208 && pDis->uOpMode == pDis->uCpuMode
1209 && pDis->uAddrMode == pDis->uCpuMode)
1210 {
1211 bool fValidStosd = false;
1212
1213 if ( pDis->uCpuMode == DISCPUMODE_32BIT
1214 && pDis->fPrefix == DISPREFIX_REP
1215 && pRegFrame->ecx <= 0x20
1216 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1217 && !((uintptr_t)pvFault & 3)
1218 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1219 )
1220 {
1221 fValidStosd = true;
1222 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1223 }
1224 else
1225 if ( pDis->uCpuMode == DISCPUMODE_64BIT
1226 && pDis->fPrefix == (DISPREFIX_REP | DISPREFIX_REX)
1227 && pRegFrame->rcx <= 0x20
1228 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1229 && !((uintptr_t)pvFault & 7)
1230 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1231 )
1232 {
1233 fValidStosd = true;
1234 }
1235
1236 if (fValidStosd)
1237 {
1238 rc = pgmPoolAccessPfHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1239 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1240 pgmUnlock(pVM);
1241 return rc;
1242 }
1243 }
1244
1245 /* REP prefix, don't bother. */
1246 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1247 Log4(("pgmPoolAccessPfHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1248 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode, pDis->fPrefix));
1249 fNotReusedNotForking = true;
1250 }
1251
1252#if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1253 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1254 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1255 */
1256 if ( pPage->cModifications >= cMaxModifications
1257 && !fForcedFlush
1258 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1259 && ( fNotReusedNotForking
1260 || ( !pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
1261 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1262 )
1263 )
1264 {
1265 Assert(!pgmPoolIsPageLocked(pPage));
1266 Assert(pPage->fDirty == false);
1267
1268 /* Flush any monitored duplicates as we will disable write protection. */
1269 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1270 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1271 {
1272 PPGMPOOLPAGE pPageHead = pPage;
1273
1274 /* Find the monitor head. */
1275 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1276 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1277
1278 while (pPageHead)
1279 {
1280 unsigned idxNext = pPageHead->iMonitoredNext;
1281
1282 if (pPageHead != pPage)
1283 {
1284 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1285 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1286 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1287 AssertRC(rc2);
1288 }
1289
1290 if (idxNext == NIL_PGMPOOL_IDX)
1291 break;
1292
1293 pPageHead = &pPool->aPages[idxNext];
1294 }
1295 }
1296
1297 /* The flushing above might fail for locked pages, so double check. */
1298 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1299 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1300 {
1301 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1302
1303 /* Temporarily allow write access to the page table again. */
1304 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK, pPage->GCPhys & PAGE_BASE_GC_MASK);
1305 if (rc == VINF_SUCCESS)
1306 {
1307 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1308 AssertMsg(rc == VINF_SUCCESS
1309 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1310 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1311 || rc == VERR_PAGE_NOT_PRESENT,
1312 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1313# ifdef VBOX_STRICT
1314 pPage->GCPtrDirtyFault = pvFault;
1315# endif
1316
1317 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1318 pgmUnlock(pVM);
1319 return rc;
1320 }
1321 }
1322 }
1323#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1324
1325 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushModOverflow));
1326flushPage:
1327 /*
1328 * Not worth it, so flush it.
1329 *
1330 * If we considered it to be reused, don't go back to ring-3
1331 * to emulate failed instructions since we usually cannot
1332 * interpret then. This may be a bit risky, in which case
1333 * the reuse detection must be fixed.
1334 */
1335 rc = pgmPoolAccessPfHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1336 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1337 && fReused)
1338 {
1339 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1340 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1341 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1342 }
1343 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1344 pgmUnlock(pVM);
1345 return rc;
1346}
1347
1348# endif /* !IN_RING3 */
1349
1350/**
1351 * @callback_method_impl{FNPGMPHYSHANDLER,
1352 * Access handler for shadowed page table pages.}
1353 *
1354 * @remarks Only uses the VINF_PGM_HANDLER_DO_DEFAULT status.
1355 */
1356PGM_ALL_CB2_DECL(VBOXSTRICTRC)
1357pgmPoolAccessHandler(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf,
1358 PGMACCESSTYPE enmAccessType, PGMACCESSORIGIN enmOrigin, void *pvUser)
1359{
1360 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1361 STAM_PROFILE_START(&pPool->StatMonitorR3, a);
1362 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1363 LogFlow(("PGM_ALL_CB_DECL: GCPhys=%RGp %p:{.Core=%RHp, .idx=%d, .GCPhys=%RGp, .enmType=%d}\n",
1364 GCPhys, pPage, pPage->Core.Key, pPage->idx, pPage->GCPhys, pPage->enmKind));
1365
1366 NOREF(pvPhys); NOREF(pvBuf); NOREF(enmAccessType);
1367
1368 /*
1369 * Make sure the pool page wasn't modified by a different CPU.
1370 */
1371 pgmLock(pVM);
1372 if (PHYS_PAGE_ADDRESS(GCPhys) == PHYS_PAGE_ADDRESS(pPage->GCPhys))
1373 {
1374 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1375
1376 /* The max modification count before flushing depends on the context and page type. */
1377#ifdef IN_RING3
1378 uint16_t const cMaxModifications = 96; /* it's cheaper here, right? */
1379#else
1380 uint16_t cMaxModifications;
1381 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1382 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1383 cMaxModifications = 4;
1384 else
1385 cMaxModifications = 24;
1386# ifdef IN_RC
1387 cMaxModifications *= 2; /* traps are cheaper than exists. */
1388# endif
1389#endif
1390
1391 /*
1392 * We don't have to be very sophisticated about this since there are relativly few calls here.
1393 * However, we must try our best to detect any non-cpu accesses (disk / networking).
1394 */
1395 if ( ( pPage->cModifications < cMaxModifications
1396 || pgmPoolIsPageLocked(pPage) )
1397 && enmOrigin != PGMACCESSORIGIN_DEVICE
1398 && cbBuf <= 16)
1399 {
1400 /* Clear the shadow entry. */
1401 if (!pPage->cModifications++)
1402 pgmPoolMonitorModifiedInsert(pPool, pPage);
1403
1404 if (cbBuf <= 8)
1405 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, (uint32_t)cbBuf);
1406 else
1407 {
1408 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, 8);
1409 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys + 8, (uint8_t *)pvBuf + 8, (uint32_t)cbBuf - 8);
1410 }
1411 }
1412 else
1413 {
1414 /* ASSUME that VERR_PGM_POOL_CLEARED can be ignored here and that FFs will deal with it in due time. */
1415 pgmPoolMonitorChainFlush(pPool, pPage);
1416 }
1417
1418 STAM_PROFILE_STOP_EX(&pPool->StatMonitorR3, &pPool->StatMonitorR3FlushPage, a);
1419 }
1420 else
1421 Log(("CPU%d: PGM_ALL_CB_DECL pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhys), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1422 pgmUnlock(pVM);
1423 return VINF_PGM_HANDLER_DO_DEFAULT;
1424}
1425
1426
1427# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1428
1429# if defined(VBOX_STRICT) && !defined(IN_RING3)
1430
1431/**
1432 * Check references to guest physical memory in a PAE / PAE page table.
1433 *
1434 * @param pPool The pool.
1435 * @param pPage The page.
1436 * @param pShwPT The shadow page table (mapping of the page).
1437 * @param pGstPT The guest page table.
1438 */
1439static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1440{
1441 unsigned cErrors = 0;
1442 int LastRc = -1; /* initialized to shut up gcc */
1443 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1444 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1445 PVM pVM = pPool->CTX_SUFF(pVM);
1446
1447#ifdef VBOX_STRICT
1448 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1449 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1450#endif
1451 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1452 {
1453 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1454 {
1455 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1456 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1457 if ( rc != VINF_SUCCESS
1458 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1459 {
1460 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1461 LastPTE = i;
1462 LastRc = rc;
1463 LastHCPhys = HCPhys;
1464 cErrors++;
1465
1466 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1467 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1468 AssertRC(rc);
1469
1470 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1471 {
1472 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1473
1474 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1475 {
1476 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1477
1478 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1479 {
1480 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1481 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1482 {
1483 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1484 }
1485 }
1486
1487 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1488 }
1489 }
1490 }
1491 }
1492 }
1493 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1494}
1495
1496
1497/**
1498 * Check references to guest physical memory in a PAE / 32-bit page table.
1499 *
1500 * @param pPool The pool.
1501 * @param pPage The page.
1502 * @param pShwPT The shadow page table (mapping of the page).
1503 * @param pGstPT The guest page table.
1504 */
1505static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1506{
1507 unsigned cErrors = 0;
1508 int LastRc = -1; /* initialized to shut up gcc */
1509 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1510 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1511 PVM pVM = pPool->CTX_SUFF(pVM);
1512
1513#ifdef VBOX_STRICT
1514 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1515 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1516#endif
1517 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1518 {
1519 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1520 {
1521 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1522 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1523 if ( rc != VINF_SUCCESS
1524 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1525 {
1526 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1527 LastPTE = i;
1528 LastRc = rc;
1529 LastHCPhys = HCPhys;
1530 cErrors++;
1531
1532 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1533 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1534 AssertRC(rc);
1535
1536 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1537 {
1538 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1539
1540 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1541 {
1542 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1543
1544 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1545 {
1546 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1547 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1548 {
1549 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1550 }
1551 }
1552
1553 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1554 }
1555 }
1556 }
1557 }
1558 }
1559 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1560}
1561
1562# endif /* VBOX_STRICT && !IN_RING3 */
1563
1564/**
1565 * Clear references to guest physical memory in a PAE / PAE page table.
1566 *
1567 * @returns nr of changed PTEs
1568 * @param pPool The pool.
1569 * @param pPage The page.
1570 * @param pShwPT The shadow page table (mapping of the page).
1571 * @param pGstPT The guest page table.
1572 * @param pOldGstPT The old cached guest page table.
1573 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1574 * @param pfFlush Flush reused page table (out)
1575 */
1576DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1577 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1578{
1579 unsigned cChanged = 0;
1580
1581#ifdef VBOX_STRICT
1582 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1583 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1584#endif
1585 *pfFlush = false;
1586
1587 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1588 {
1589 /* Check the new value written by the guest. If present and with a bogus physical address, then
1590 * it's fairly safe to assume the guest is reusing the PT.
1591 */
1592 if ( fAllowRemoval
1593 && pGstPT->a[i].n.u1Present)
1594 {
1595 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1596 {
1597 *pfFlush = true;
1598 return ++cChanged;
1599 }
1600 }
1601 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1602 {
1603 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1604 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1605 {
1606#ifdef VBOX_STRICT
1607 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1608 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1609 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1610#endif
1611 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1612 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1613 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1614 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1615
1616 if ( uHostAttr == uGuestAttr
1617 && fHostRW <= fGuestRW)
1618 continue;
1619 }
1620 cChanged++;
1621 /* Something was changed, so flush it. */
1622 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1623 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1624 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1625 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1626 }
1627 }
1628 return cChanged;
1629}
1630
1631
1632/**
1633 * Clear references to guest physical memory in a PAE / PAE page table.
1634 *
1635 * @returns nr of changed PTEs
1636 * @param pPool The pool.
1637 * @param pPage The page.
1638 * @param pShwPT The shadow page table (mapping of the page).
1639 * @param pGstPT The guest page table.
1640 * @param pOldGstPT The old cached guest page table.
1641 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1642 * @param pfFlush Flush reused page table (out)
1643 */
1644DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1645 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1646{
1647 unsigned cChanged = 0;
1648
1649#ifdef VBOX_STRICT
1650 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1651 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1652#endif
1653 *pfFlush = false;
1654
1655 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1656 {
1657 /* Check the new value written by the guest. If present and with a bogus physical address, then
1658 * it's fairly safe to assume the guest is reusing the PT.
1659 */
1660 if ( fAllowRemoval
1661 && pGstPT->a[i].n.u1Present)
1662 {
1663 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK))
1664 {
1665 *pfFlush = true;
1666 return ++cChanged;
1667 }
1668 }
1669 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1670 {
1671 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1672 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1673 {
1674#ifdef VBOX_STRICT
1675 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1676 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1677 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1678#endif
1679 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1680 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1681 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1682 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1683
1684 if ( uHostAttr == uGuestAttr
1685 && fHostRW <= fGuestRW)
1686 continue;
1687 }
1688 cChanged++;
1689 /* Something was changed, so flush it. */
1690 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1691 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1692 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1693 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1694 }
1695 }
1696 return cChanged;
1697}
1698
1699
1700/**
1701 * Flush a dirty page
1702 *
1703 * @param pVM The cross context VM structure.
1704 * @param pPool The pool.
1705 * @param idxSlot Dirty array slot index
1706 * @param fAllowRemoval Allow a reused page table to be removed
1707 */
1708static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1709{
1710 PPGMPOOLPAGE pPage;
1711 unsigned idxPage;
1712
1713 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1714 if (pPool->aDirtyPages[idxSlot].uIdx == NIL_PGMPOOL_IDX)
1715 return;
1716
1717 idxPage = pPool->aDirtyPages[idxSlot].uIdx;
1718 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1719 pPage = &pPool->aPages[idxPage];
1720 Assert(pPage->idx == idxPage);
1721 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1722
1723 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1724 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1725
1726#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1727 PVMCPU pVCpu = VMMGetCpu(pVM);
1728 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
1729#endif
1730
1731 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1732 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK);
1733 Assert(rc == VINF_SUCCESS);
1734 pPage->fDirty = false;
1735
1736#ifdef VBOX_STRICT
1737 uint64_t fFlags = 0;
1738 RTHCPHYS HCPhys;
1739 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->GCPtrDirtyFault, &fFlags, &HCPhys);
1740 AssertMsg( ( rc == VINF_SUCCESS
1741 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1742 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1743 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1744 || rc == VERR_PAGE_NOT_PRESENT,
1745 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->GCPtrDirtyFault, rc, fFlags));
1746#endif
1747
1748 /* Flush those PTEs that have changed. */
1749 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1750 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1751 void *pvGst;
1752 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1753 bool fFlush;
1754 unsigned cChanges;
1755
1756 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1757 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1758 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1759 else
1760 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1761 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1762
1763 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1764 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1765 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1766 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1767
1768 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1769 Assert(pPage->cModifications);
1770 if (cChanges < 4)
1771 pPage->cModifications = 1; /* must use > 0 here */
1772 else
1773 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1774
1775 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1776 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1777 pPool->idxFreeDirtyPage = idxSlot;
1778
1779 pPool->cDirtyPages--;
1780 pPool->aDirtyPages[idxSlot].uIdx = NIL_PGMPOOL_IDX;
1781 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1782 if (fFlush)
1783 {
1784 Assert(fAllowRemoval);
1785 Log(("Flush reused page table!\n"));
1786 pgmPoolFlushPage(pPool, pPage);
1787 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1788 }
1789 else
1790 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1791
1792#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1793 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
1794#endif
1795}
1796
1797
1798# ifndef IN_RING3
1799/**
1800 * Add a new dirty page
1801 *
1802 * @param pVM The cross context VM structure.
1803 * @param pPool The pool.
1804 * @param pPage The page.
1805 */
1806void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1807{
1808 unsigned idxFree;
1809
1810 PGM_LOCK_ASSERT_OWNER(pVM);
1811 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1812 Assert(!pPage->fDirty);
1813
1814 idxFree = pPool->idxFreeDirtyPage;
1815 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1816 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1817
1818 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1819 {
1820 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1821 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1822 }
1823 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1824 AssertMsg(pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1825
1826 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1827
1828 /*
1829 * Make a copy of the guest page table as we require valid GCPhys addresses
1830 * when removing references to physical pages.
1831 * (The HCPhys linear lookup is *extremely* expensive!)
1832 */
1833 void *pvGst;
1834 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1835 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst, (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT) ? PAGE_SIZE : PAGE_SIZE/2);
1836# ifdef VBOX_STRICT
1837 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1838 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1839 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1840 else
1841 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1842 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1843# endif
1844 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1845
1846 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1847 pPage->fDirty = true;
1848 pPage->idxDirtyEntry = (uint8_t)idxFree; Assert(pPage->idxDirtyEntry == idxFree);
1849 pPool->aDirtyPages[idxFree].uIdx = pPage->idx;
1850 pPool->cDirtyPages++;
1851
1852 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1853 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1854 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1855 {
1856 unsigned i;
1857 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1858 {
1859 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1860 if (pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX)
1861 {
1862 pPool->idxFreeDirtyPage = idxFree;
1863 break;
1864 }
1865 }
1866 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1867 }
1868
1869 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX);
1870
1871 /*
1872 * Clear all references to this shadow table. See @bugref{7298}.
1873 */
1874 pgmPoolTrackClearPageUsers(pPool, pPage);
1875}
1876# endif /* !IN_RING3 */
1877
1878
1879/**
1880 * Check if the specified page is dirty (not write monitored)
1881 *
1882 * @return dirty or not
1883 * @param pVM The cross context VM structure.
1884 * @param GCPhys Guest physical address
1885 */
1886bool pgmPoolIsDirtyPage(PVM pVM, RTGCPHYS GCPhys)
1887{
1888 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1889 PGM_LOCK_ASSERT_OWNER(pVM);
1890 if (!pPool->cDirtyPages)
1891 return false;
1892
1893 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1894
1895 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1896 {
1897 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
1898 {
1899 PPGMPOOLPAGE pPage;
1900 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
1901
1902 pPage = &pPool->aPages[idxPage];
1903 if (pPage->GCPhys == GCPhys)
1904 return true;
1905 }
1906 }
1907 return false;
1908}
1909
1910
1911/**
1912 * Reset all dirty pages by reinstating page monitoring.
1913 *
1914 * @param pVM The cross context VM structure.
1915 */
1916void pgmPoolResetDirtyPages(PVM pVM)
1917{
1918 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1919 PGM_LOCK_ASSERT_OWNER(pVM);
1920 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1921
1922 if (!pPool->cDirtyPages)
1923 return;
1924
1925 Log(("pgmPoolResetDirtyPages\n"));
1926 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1927 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1928
1929 pPool->idxFreeDirtyPage = 0;
1930 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1931 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1932 {
1933 unsigned i;
1934 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1935 {
1936 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
1937 {
1938 pPool->idxFreeDirtyPage = i;
1939 break;
1940 }
1941 }
1942 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1943 }
1944
1945 Assert(pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
1946 return;
1947}
1948
1949
1950/**
1951 * Invalidate the PT entry for the specified page
1952 *
1953 * @param pVM The cross context VM structure.
1954 * @param GCPtrPage Guest page to invalidate
1955 */
1956void pgmPoolResetDirtyPage(PVM pVM, RTGCPTR GCPtrPage)
1957{
1958 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1959 PGM_LOCK_ASSERT_OWNER(pVM);
1960 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1961
1962 if (!pPool->cDirtyPages)
1963 return;
1964
1965 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage));
1966 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1967 {
1968 }
1969}
1970
1971
1972/**
1973 * Reset all dirty pages by reinstating page monitoring.
1974 *
1975 * @param pVM The cross context VM structure.
1976 * @param GCPhysPT Physical address of the page table
1977 */
1978void pgmPoolInvalidateDirtyPage(PVM pVM, RTGCPHYS GCPhysPT)
1979{
1980 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1981 PGM_LOCK_ASSERT_OWNER(pVM);
1982 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1983 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
1984
1985 if (!pPool->cDirtyPages)
1986 return;
1987
1988 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1989
1990 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1991 {
1992 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
1993 {
1994 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
1995
1996 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1997 if (pPage->GCPhys == GCPhysPT)
1998 {
1999 idxDirtyPage = i;
2000 break;
2001 }
2002 }
2003 }
2004
2005 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
2006 {
2007 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
2008 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
2009 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
2010 {
2011 unsigned i;
2012 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2013 {
2014 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
2015 {
2016 pPool->idxFreeDirtyPage = i;
2017 break;
2018 }
2019 }
2020 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
2021 }
2022 }
2023}
2024
2025# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
2026
2027/**
2028 * Inserts a page into the GCPhys hash table.
2029 *
2030 * @param pPool The pool.
2031 * @param pPage The page.
2032 */
2033DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2034{
2035 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
2036 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2037 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2038 pPage->iNext = pPool->aiHash[iHash];
2039 pPool->aiHash[iHash] = pPage->idx;
2040}
2041
2042
2043/**
2044 * Removes a page from the GCPhys hash table.
2045 *
2046 * @param pPool The pool.
2047 * @param pPage The page.
2048 */
2049DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2050{
2051 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
2052 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2053 if (pPool->aiHash[iHash] == pPage->idx)
2054 pPool->aiHash[iHash] = pPage->iNext;
2055 else
2056 {
2057 uint16_t iPrev = pPool->aiHash[iHash];
2058 for (;;)
2059 {
2060 const int16_t i = pPool->aPages[iPrev].iNext;
2061 if (i == pPage->idx)
2062 {
2063 pPool->aPages[iPrev].iNext = pPage->iNext;
2064 break;
2065 }
2066 if (i == NIL_PGMPOOL_IDX)
2067 {
2068 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
2069 break;
2070 }
2071 iPrev = i;
2072 }
2073 }
2074 pPage->iNext = NIL_PGMPOOL_IDX;
2075}
2076
2077
2078/**
2079 * Frees up one cache page.
2080 *
2081 * @returns VBox status code.
2082 * @retval VINF_SUCCESS on success.
2083 * @param pPool The pool.
2084 * @param iUser The user index.
2085 */
2086static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
2087{
2088#ifndef IN_RC
2089 const PVM pVM = pPool->CTX_SUFF(pVM);
2090#endif
2091 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
2092 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
2093
2094 /*
2095 * Select one page from the tail of the age list.
2096 */
2097 PPGMPOOLPAGE pPage;
2098 for (unsigned iLoop = 0; ; iLoop++)
2099 {
2100 uint16_t iToFree = pPool->iAgeTail;
2101 if (iToFree == iUser && iUser != NIL_PGMPOOL_IDX)
2102 iToFree = pPool->aPages[iToFree].iAgePrev;
2103/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2104 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2105 {
2106 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2107 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2108 {
2109 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2110 continue;
2111 iToFree = i;
2112 break;
2113 }
2114 }
2115*/
2116 Assert(iToFree != iUser);
2117 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
2118 pPage = &pPool->aPages[iToFree];
2119
2120 /*
2121 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2122 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2123 */
2124 if ( !pgmPoolIsPageLocked(pPage)
2125 && pPage->idx >= PGMPOOL_IDX_FIRST /* paranoia (#6349) */)
2126 break;
2127 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2128 pgmPoolCacheUsed(pPool, pPage);
2129 AssertLogRelReturn(iLoop < 8192, VERR_PGM_POOL_TOO_MANY_LOOPS);
2130 }
2131
2132 /*
2133 * Found a usable page, flush it and return.
2134 */
2135 int rc = pgmPoolFlushPage(pPool, pPage);
2136 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2137 /* todo: find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2138 if (rc == VINF_SUCCESS)
2139 PGM_INVL_ALL_VCPU_TLBS(pVM);
2140 return rc;
2141}
2142
2143
2144/**
2145 * Checks if a kind mismatch is really a page being reused
2146 * or if it's just normal remappings.
2147 *
2148 * @returns true if reused and the cached page (enmKind1) should be flushed
2149 * @returns false if not reused.
2150 * @param enmKind1 The kind of the cached page.
2151 * @param enmKind2 The kind of the requested page.
2152 */
2153static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2154{
2155 switch (enmKind1)
2156 {
2157 /*
2158 * Never reuse them. There is no remapping in non-paging mode.
2159 */
2160 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2161 case PGMPOOLKIND_32BIT_PD_PHYS:
2162 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2163 case PGMPOOLKIND_PAE_PD_PHYS:
2164 case PGMPOOLKIND_PAE_PDPT_PHYS:
2165 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2166 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2167 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2168 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2169 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2170 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2171 return false;
2172
2173 /*
2174 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2175 */
2176 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2177 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2178 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2179 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2180 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2181 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2182 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2183 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2184 case PGMPOOLKIND_32BIT_PD:
2185 case PGMPOOLKIND_PAE_PDPT:
2186 switch (enmKind2)
2187 {
2188 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2189 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2190 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2191 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2192 case PGMPOOLKIND_64BIT_PML4:
2193 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2194 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2195 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2196 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2197 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2198 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2199 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2200 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2201 return true;
2202 default:
2203 return false;
2204 }
2205
2206 /*
2207 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2208 */
2209 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2210 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2211 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2212 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2213 case PGMPOOLKIND_64BIT_PML4:
2214 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2215 switch (enmKind2)
2216 {
2217 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2218 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2219 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2220 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2221 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2222 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2223 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2224 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2225 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2226 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2227 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2228 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2229 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2230 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2231 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2232 return true;
2233 default:
2234 return false;
2235 }
2236
2237 /*
2238 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2239 */
2240 case PGMPOOLKIND_ROOT_NESTED:
2241 return false;
2242
2243 default:
2244 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2245 }
2246}
2247
2248
2249/**
2250 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2251 *
2252 * @returns VBox status code.
2253 * @retval VINF_PGM_CACHED_PAGE on success.
2254 * @retval VERR_FILE_NOT_FOUND if not found.
2255 * @param pPool The pool.
2256 * @param GCPhys The GC physical address of the page we're gonna shadow.
2257 * @param enmKind The kind of mapping.
2258 * @param enmAccess Access type for the mapping (only relevant for big pages)
2259 * @param fA20Enabled Whether the CPU has the A20 gate enabled.
2260 * @param iUser The shadow page pool index of the user table. This is
2261 * NIL_PGMPOOL_IDX for root pages.
2262 * @param iUserTable The index into the user table (shadowed). Ignored if
2263 * root page
2264 * @param ppPage Where to store the pointer to the page.
2265 */
2266static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
2267 uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2268{
2269 /*
2270 * Look up the GCPhys in the hash.
2271 */
2272 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2273 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2274 if (i != NIL_PGMPOOL_IDX)
2275 {
2276 do
2277 {
2278 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2279 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2280 if (pPage->GCPhys == GCPhys)
2281 {
2282 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2283 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess
2284 && pPage->fA20Enabled == fA20Enabled)
2285 {
2286 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2287 * doesn't flush it in case there are no more free use records.
2288 */
2289 pgmPoolCacheUsed(pPool, pPage);
2290
2291 int rc = VINF_SUCCESS;
2292 if (iUser != NIL_PGMPOOL_IDX)
2293 rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2294 if (RT_SUCCESS(rc))
2295 {
2296 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2297 *ppPage = pPage;
2298 if (pPage->cModifications)
2299 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2300 STAM_COUNTER_INC(&pPool->StatCacheHits);
2301 return VINF_PGM_CACHED_PAGE;
2302 }
2303 return rc;
2304 }
2305
2306 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2307 {
2308 /*
2309 * The kind is different. In some cases we should now flush the page
2310 * as it has been reused, but in most cases this is normal remapping
2311 * of PDs as PT or big pages using the GCPhys field in a slightly
2312 * different way than the other kinds.
2313 */
2314 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2315 {
2316 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2317 pgmPoolFlushPage(pPool, pPage);
2318 break;
2319 }
2320 }
2321 }
2322
2323 /* next */
2324 i = pPage->iNext;
2325 } while (i != NIL_PGMPOOL_IDX);
2326 }
2327
2328 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2329 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2330 return VERR_FILE_NOT_FOUND;
2331}
2332
2333
2334/**
2335 * Inserts a page into the cache.
2336 *
2337 * @param pPool The pool.
2338 * @param pPage The cached page.
2339 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2340 */
2341static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2342{
2343 /*
2344 * Insert into the GCPhys hash if the page is fit for that.
2345 */
2346 Assert(!pPage->fCached);
2347 if (fCanBeCached)
2348 {
2349 pPage->fCached = true;
2350 pgmPoolHashInsert(pPool, pPage);
2351 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2352 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2353 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2354 }
2355 else
2356 {
2357 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2358 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2359 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2360 }
2361
2362 /*
2363 * Insert at the head of the age list.
2364 */
2365 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2366 pPage->iAgeNext = pPool->iAgeHead;
2367 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2368 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2369 else
2370 pPool->iAgeTail = pPage->idx;
2371 pPool->iAgeHead = pPage->idx;
2372}
2373
2374
2375/**
2376 * Flushes a cached page.
2377 *
2378 * @param pPool The pool.
2379 * @param pPage The cached page.
2380 */
2381static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2382{
2383 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2384
2385 /*
2386 * Remove the page from the hash.
2387 */
2388 if (pPage->fCached)
2389 {
2390 pPage->fCached = false;
2391 pgmPoolHashRemove(pPool, pPage);
2392 }
2393 else
2394 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2395
2396 /*
2397 * Remove it from the age list.
2398 */
2399 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2400 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2401 else
2402 pPool->iAgeTail = pPage->iAgePrev;
2403 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2404 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2405 else
2406 pPool->iAgeHead = pPage->iAgeNext;
2407 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2408 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2409}
2410
2411
2412/**
2413 * Looks for pages sharing the monitor.
2414 *
2415 * @returns Pointer to the head page.
2416 * @returns NULL if not found.
2417 * @param pPool The Pool
2418 * @param pNewPage The page which is going to be monitored.
2419 */
2420static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2421{
2422 /*
2423 * Look up the GCPhys in the hash.
2424 */
2425 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2426 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2427 if (i == NIL_PGMPOOL_IDX)
2428 return NULL;
2429 do
2430 {
2431 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2432 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2433 && pPage != pNewPage)
2434 {
2435 switch (pPage->enmKind)
2436 {
2437 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2438 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2439 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2440 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2441 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2442 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2443 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2444 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2445 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2446 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2447 case PGMPOOLKIND_64BIT_PML4:
2448 case PGMPOOLKIND_32BIT_PD:
2449 case PGMPOOLKIND_PAE_PDPT:
2450 {
2451 /* find the head */
2452 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2453 {
2454 Assert(pPage->iMonitoredPrev != pPage->idx);
2455 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2456 }
2457 return pPage;
2458 }
2459
2460 /* ignore, no monitoring. */
2461 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2462 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2463 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2464 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2465 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2466 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2467 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2468 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2469 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2470 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2471 case PGMPOOLKIND_ROOT_NESTED:
2472 case PGMPOOLKIND_PAE_PD_PHYS:
2473 case PGMPOOLKIND_PAE_PDPT_PHYS:
2474 case PGMPOOLKIND_32BIT_PD_PHYS:
2475 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2476 break;
2477 default:
2478 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2479 }
2480 }
2481
2482 /* next */
2483 i = pPage->iNext;
2484 } while (i != NIL_PGMPOOL_IDX);
2485 return NULL;
2486}
2487
2488
2489/**
2490 * Enabled write monitoring of a guest page.
2491 *
2492 * @returns VBox status code.
2493 * @retval VINF_SUCCESS on success.
2494 * @param pPool The pool.
2495 * @param pPage The cached page.
2496 */
2497static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2498{
2499 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2500
2501 /*
2502 * Filter out the relevant kinds.
2503 */
2504 switch (pPage->enmKind)
2505 {
2506 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2507 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2508 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2509 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2510 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2511 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2512 case PGMPOOLKIND_64BIT_PML4:
2513 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2514 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2515 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2516 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2517 case PGMPOOLKIND_32BIT_PD:
2518 case PGMPOOLKIND_PAE_PDPT:
2519 break;
2520
2521 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2522 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2523 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2524 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2525 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2526 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2527 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2528 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2529 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2530 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2531 case PGMPOOLKIND_ROOT_NESTED:
2532 /* Nothing to monitor here. */
2533 return VINF_SUCCESS;
2534
2535 case PGMPOOLKIND_32BIT_PD_PHYS:
2536 case PGMPOOLKIND_PAE_PDPT_PHYS:
2537 case PGMPOOLKIND_PAE_PD_PHYS:
2538 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2539 /* Nothing to monitor here. */
2540 return VINF_SUCCESS;
2541 default:
2542 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2543 }
2544
2545 /*
2546 * Install handler.
2547 */
2548 int rc;
2549 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2550 if (pPageHead)
2551 {
2552 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2553 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2554
2555#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2556 if (pPageHead->fDirty)
2557 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirtyEntry, false /* do not remove */);
2558#endif
2559
2560 pPage->iMonitoredPrev = pPageHead->idx;
2561 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2562 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2563 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2564 pPageHead->iMonitoredNext = pPage->idx;
2565 rc = VINF_SUCCESS;
2566 }
2567 else
2568 {
2569 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2570 PVM pVM = pPool->CTX_SUFF(pVM);
2571 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2572 rc = PGMHandlerPhysicalRegister(pVM, GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK, pPool->hAccessHandlerType,
2573 MMHyperCCToR3(pVM, pPage), MMHyperCCToR0(pVM, pPage), MMHyperCCToRC(pVM, pPage),
2574 NIL_RTR3PTR /*pszDesc*/);
2575 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2576 * the heap size should suffice. */
2577 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2578 PVMCPU pVCpu = VMMGetCpu(pVM);
2579 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2580 }
2581 pPage->fMonitored = true;
2582 return rc;
2583}
2584
2585
2586/**
2587 * Disables write monitoring of a guest page.
2588 *
2589 * @returns VBox status code.
2590 * @retval VINF_SUCCESS on success.
2591 * @param pPool The pool.
2592 * @param pPage The cached page.
2593 */
2594static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2595{
2596 /*
2597 * Filter out the relevant kinds.
2598 */
2599 switch (pPage->enmKind)
2600 {
2601 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2602 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2603 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2604 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2605 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2606 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2607 case PGMPOOLKIND_64BIT_PML4:
2608 case PGMPOOLKIND_32BIT_PD:
2609 case PGMPOOLKIND_PAE_PDPT:
2610 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2611 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2612 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2613 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2614 break;
2615
2616 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2617 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2618 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2619 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2620 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2621 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2622 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2623 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2624 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2625 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2626 case PGMPOOLKIND_ROOT_NESTED:
2627 case PGMPOOLKIND_PAE_PD_PHYS:
2628 case PGMPOOLKIND_PAE_PDPT_PHYS:
2629 case PGMPOOLKIND_32BIT_PD_PHYS:
2630 /* Nothing to monitor here. */
2631 Assert(!pPage->fMonitored);
2632 return VINF_SUCCESS;
2633
2634 default:
2635 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2636 }
2637 Assert(pPage->fMonitored);
2638
2639 /*
2640 * Remove the page from the monitored list or uninstall it if last.
2641 */
2642 const PVM pVM = pPool->CTX_SUFF(pVM);
2643 int rc;
2644 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2645 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2646 {
2647 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2648 {
2649 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2650 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2651 rc = PGMHandlerPhysicalChangeUserArgs(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK, MMHyperCCToR3(pVM, pNewHead),
2652 MMHyperCCToR0(pVM, pNewHead), MMHyperCCToRC(pVM, pNewHead));
2653
2654 AssertFatalRCSuccess(rc);
2655 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2656 }
2657 else
2658 {
2659 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2660 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2661 {
2662 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2663 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2664 }
2665 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2666 rc = VINF_SUCCESS;
2667 }
2668 }
2669 else
2670 {
2671 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2672 AssertFatalRC(rc);
2673 PVMCPU pVCpu = VMMGetCpu(pVM);
2674 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2675 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2676 }
2677 pPage->fMonitored = false;
2678
2679 /*
2680 * Remove it from the list of modified pages (if in it).
2681 */
2682 pgmPoolMonitorModifiedRemove(pPool, pPage);
2683
2684 return rc;
2685}
2686
2687
2688/**
2689 * Inserts the page into the list of modified pages.
2690 *
2691 * @param pPool The pool.
2692 * @param pPage The page.
2693 */
2694void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2695{
2696 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2697 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2698 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2699 && pPool->iModifiedHead != pPage->idx,
2700 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2701 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2702 pPool->iModifiedHead, pPool->cModifiedPages));
2703
2704 pPage->iModifiedNext = pPool->iModifiedHead;
2705 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2706 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2707 pPool->iModifiedHead = pPage->idx;
2708 pPool->cModifiedPages++;
2709#ifdef VBOX_WITH_STATISTICS
2710 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2711 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2712#endif
2713}
2714
2715
2716/**
2717 * Removes the page from the list of modified pages and resets the
2718 * modification counter.
2719 *
2720 * @param pPool The pool.
2721 * @param pPage The page which is believed to be in the list of modified pages.
2722 */
2723static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2724{
2725 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2726 if (pPool->iModifiedHead == pPage->idx)
2727 {
2728 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2729 pPool->iModifiedHead = pPage->iModifiedNext;
2730 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2731 {
2732 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2733 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2734 }
2735 pPool->cModifiedPages--;
2736 }
2737 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2738 {
2739 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2740 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2741 {
2742 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2743 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2744 }
2745 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2746 pPool->cModifiedPages--;
2747 }
2748 else
2749 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2750 pPage->cModifications = 0;
2751}
2752
2753
2754/**
2755 * Zaps the list of modified pages, resetting their modification counters in the process.
2756 *
2757 * @param pVM The cross context VM structure.
2758 */
2759static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2760{
2761 pgmLock(pVM);
2762 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2763 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2764
2765 unsigned cPages = 0; NOREF(cPages);
2766
2767#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2768 pgmPoolResetDirtyPages(pVM);
2769#endif
2770
2771 uint16_t idx = pPool->iModifiedHead;
2772 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2773 while (idx != NIL_PGMPOOL_IDX)
2774 {
2775 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2776 idx = pPage->iModifiedNext;
2777 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2778 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2779 pPage->cModifications = 0;
2780 Assert(++cPages);
2781 }
2782 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2783 pPool->cModifiedPages = 0;
2784 pgmUnlock(pVM);
2785}
2786
2787
2788/**
2789 * Handle SyncCR3 pool tasks
2790 *
2791 * @returns VBox status code.
2792 * @retval VINF_SUCCESS if successfully added.
2793 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2794 * @param pVCpu The cross context virtual CPU structure.
2795 * @remark Should only be used when monitoring is available, thus placed in
2796 * the PGMPOOL_WITH_MONITORING \#ifdef.
2797 */
2798int pgmPoolSyncCR3(PVMCPU pVCpu)
2799{
2800 PVM pVM = pVCpu->CTX_SUFF(pVM);
2801 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2802
2803 /*
2804 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2805 * Occasionally we will have to clear all the shadow page tables because we wanted
2806 * to monitor a page which was mapped by too many shadowed page tables. This operation
2807 * sometimes referred to as a 'lightweight flush'.
2808 */
2809# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2810 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2811 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2812# else /* !IN_RING3 */
2813 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2814 {
2815 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2816 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2817
2818 /* Make sure all other VCPUs return to ring 3. */
2819 if (pVM->cCpus > 1)
2820 {
2821 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2822 PGM_INVL_ALL_VCPU_TLBS(pVM);
2823 }
2824 return VINF_PGM_SYNC_CR3;
2825 }
2826# endif /* !IN_RING3 */
2827 else
2828 {
2829 pgmPoolMonitorModifiedClearAll(pVM);
2830
2831 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2832 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2833 {
2834 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2835 return pgmPoolSyncCR3(pVCpu);
2836 }
2837 }
2838 return VINF_SUCCESS;
2839}
2840
2841
2842/**
2843 * Frees up at least one user entry.
2844 *
2845 * @returns VBox status code.
2846 * @retval VINF_SUCCESS if successfully added.
2847 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2848 * @param pPool The pool.
2849 * @param iUser The user index.
2850 */
2851static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2852{
2853 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2854 /*
2855 * Just free cached pages in a braindead fashion.
2856 */
2857 /** @todo walk the age list backwards and free the first with usage. */
2858 int rc = VINF_SUCCESS;
2859 do
2860 {
2861 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2862 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2863 rc = rc2;
2864 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2865 return rc;
2866}
2867
2868
2869/**
2870 * Inserts a page into the cache.
2871 *
2872 * This will create user node for the page, insert it into the GCPhys
2873 * hash, and insert it into the age list.
2874 *
2875 * @returns VBox status code.
2876 * @retval VINF_SUCCESS if successfully added.
2877 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2878 * @param pPool The pool.
2879 * @param pPage The cached page.
2880 * @param GCPhys The GC physical address of the page we're gonna shadow.
2881 * @param iUser The user index.
2882 * @param iUserTable The user table index.
2883 */
2884DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2885{
2886 int rc = VINF_SUCCESS;
2887 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2888
2889 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable));
2890
2891 if (iUser != NIL_PGMPOOL_IDX)
2892 {
2893#ifdef VBOX_STRICT
2894 /*
2895 * Check that the entry doesn't already exists.
2896 */
2897 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2898 {
2899 uint16_t i = pPage->iUserHead;
2900 do
2901 {
2902 Assert(i < pPool->cMaxUsers);
2903 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2904 i = paUsers[i].iNext;
2905 } while (i != NIL_PGMPOOL_USER_INDEX);
2906 }
2907#endif
2908
2909 /*
2910 * Find free a user node.
2911 */
2912 uint16_t i = pPool->iUserFreeHead;
2913 if (i == NIL_PGMPOOL_USER_INDEX)
2914 {
2915 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2916 if (RT_FAILURE(rc))
2917 return rc;
2918 i = pPool->iUserFreeHead;
2919 }
2920
2921 /*
2922 * Unlink the user node from the free list,
2923 * initialize and insert it into the user list.
2924 */
2925 pPool->iUserFreeHead = paUsers[i].iNext;
2926 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2927 paUsers[i].iUser = iUser;
2928 paUsers[i].iUserTable = iUserTable;
2929 pPage->iUserHead = i;
2930 }
2931 else
2932 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2933
2934
2935 /*
2936 * Insert into cache and enable monitoring of the guest page if enabled.
2937 *
2938 * Until we implement caching of all levels, including the CR3 one, we'll
2939 * have to make sure we don't try monitor & cache any recursive reuse of
2940 * a monitored CR3 page. Because all windows versions are doing this we'll
2941 * have to be able to do combined access monitoring, CR3 + PT and
2942 * PD + PT (guest PAE).
2943 *
2944 * Update:
2945 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2946 */
2947 const bool fCanBeMonitored = true;
2948 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2949 if (fCanBeMonitored)
2950 {
2951 rc = pgmPoolMonitorInsert(pPool, pPage);
2952 AssertRC(rc);
2953 }
2954 return rc;
2955}
2956
2957
2958/**
2959 * Adds a user reference to a page.
2960 *
2961 * This will move the page to the head of the
2962 *
2963 * @returns VBox status code.
2964 * @retval VINF_SUCCESS if successfully added.
2965 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2966 * @param pPool The pool.
2967 * @param pPage The cached page.
2968 * @param iUser The user index.
2969 * @param iUserTable The user table.
2970 */
2971static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2972{
2973 Log3(("pgmPoolTrackAddUser: GCPhys=%RGp iUser=%x iUserTable=%x\n", pPage->GCPhys, iUser, iUserTable));
2974 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2975 Assert(iUser != NIL_PGMPOOL_IDX);
2976
2977# ifdef VBOX_STRICT
2978 /*
2979 * Check that the entry doesn't already exists. We only allow multiple
2980 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
2981 */
2982 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2983 {
2984 uint16_t i = pPage->iUserHead;
2985 do
2986 {
2987 Assert(i < pPool->cMaxUsers);
2988 /** @todo this assertion looks odd... Shouldn't it be && here? */
2989 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2990 i = paUsers[i].iNext;
2991 } while (i != NIL_PGMPOOL_USER_INDEX);
2992 }
2993# endif
2994
2995 /*
2996 * Allocate a user node.
2997 */
2998 uint16_t i = pPool->iUserFreeHead;
2999 if (i == NIL_PGMPOOL_USER_INDEX)
3000 {
3001 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
3002 if (RT_FAILURE(rc))
3003 return rc;
3004 i = pPool->iUserFreeHead;
3005 }
3006 pPool->iUserFreeHead = paUsers[i].iNext;
3007
3008 /*
3009 * Initialize the user node and insert it.
3010 */
3011 paUsers[i].iNext = pPage->iUserHead;
3012 paUsers[i].iUser = iUser;
3013 paUsers[i].iUserTable = iUserTable;
3014 pPage->iUserHead = i;
3015
3016# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3017 if (pPage->fDirty)
3018 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirtyEntry, false /* do not remove */);
3019# endif
3020
3021 /*
3022 * Tell the cache to update its replacement stats for this page.
3023 */
3024 pgmPoolCacheUsed(pPool, pPage);
3025 return VINF_SUCCESS;
3026}
3027
3028
3029/**
3030 * Frees a user record associated with a page.
3031 *
3032 * This does not clear the entry in the user table, it simply replaces the
3033 * user record to the chain of free records.
3034 *
3035 * @param pPool The pool.
3036 * @param pPage The shadow page.
3037 * @param iUser The shadow page pool index of the user table.
3038 * @param iUserTable The index into the user table (shadowed).
3039 *
3040 * @remarks Don't call this for root pages.
3041 */
3042static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3043{
3044 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
3045 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3046 Assert(iUser != NIL_PGMPOOL_IDX);
3047
3048 /*
3049 * Unlink and free the specified user entry.
3050 */
3051
3052 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
3053 uint16_t i = pPage->iUserHead;
3054 if ( i != NIL_PGMPOOL_USER_INDEX
3055 && paUsers[i].iUser == iUser
3056 && paUsers[i].iUserTable == iUserTable)
3057 {
3058 pPage->iUserHead = paUsers[i].iNext;
3059
3060 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3061 paUsers[i].iNext = pPool->iUserFreeHead;
3062 pPool->iUserFreeHead = i;
3063 return;
3064 }
3065
3066 /* General: Linear search. */
3067 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
3068 while (i != NIL_PGMPOOL_USER_INDEX)
3069 {
3070 if ( paUsers[i].iUser == iUser
3071 && paUsers[i].iUserTable == iUserTable)
3072 {
3073 if (iPrev != NIL_PGMPOOL_USER_INDEX)
3074 paUsers[iPrev].iNext = paUsers[i].iNext;
3075 else
3076 pPage->iUserHead = paUsers[i].iNext;
3077
3078 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3079 paUsers[i].iNext = pPool->iUserFreeHead;
3080 pPool->iUserFreeHead = i;
3081 return;
3082 }
3083 iPrev = i;
3084 i = paUsers[i].iNext;
3085 }
3086
3087 /* Fatal: didn't find it */
3088 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
3089 iUser, iUserTable, pPage->GCPhys));
3090}
3091
3092
3093/**
3094 * Gets the entry size of a shadow table.
3095 *
3096 * @param enmKind The kind of page.
3097 *
3098 * @returns The size of the entry in bytes. That is, 4 or 8.
3099 * @returns If the kind is not for a table, an assertion is raised and 0 is
3100 * returned.
3101 */
3102DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
3103{
3104 switch (enmKind)
3105 {
3106 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3107 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3108 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3109 case PGMPOOLKIND_32BIT_PD:
3110 case PGMPOOLKIND_32BIT_PD_PHYS:
3111 return 4;
3112
3113 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3114 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3115 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3116 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3117 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3118 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3119 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3120 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3121 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3122 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3123 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3124 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3125 case PGMPOOLKIND_64BIT_PML4:
3126 case PGMPOOLKIND_PAE_PDPT:
3127 case PGMPOOLKIND_ROOT_NESTED:
3128 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3129 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3130 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3131 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3132 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3133 case PGMPOOLKIND_PAE_PD_PHYS:
3134 case PGMPOOLKIND_PAE_PDPT_PHYS:
3135 return 8;
3136
3137 default:
3138 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3139 }
3140}
3141
3142
3143/**
3144 * Gets the entry size of a guest table.
3145 *
3146 * @param enmKind The kind of page.
3147 *
3148 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3149 * @returns If the kind is not for a table, an assertion is raised and 0 is
3150 * returned.
3151 */
3152DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3153{
3154 switch (enmKind)
3155 {
3156 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3157 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3158 case PGMPOOLKIND_32BIT_PD:
3159 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3160 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3161 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3162 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3163 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3164 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3165 return 4;
3166
3167 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3168 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3169 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3170 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3171 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3172 case PGMPOOLKIND_64BIT_PML4:
3173 case PGMPOOLKIND_PAE_PDPT:
3174 return 8;
3175
3176 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3177 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3178 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3179 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3180 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3181 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3182 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3183 case PGMPOOLKIND_ROOT_NESTED:
3184 case PGMPOOLKIND_PAE_PD_PHYS:
3185 case PGMPOOLKIND_PAE_PDPT_PHYS:
3186 case PGMPOOLKIND_32BIT_PD_PHYS:
3187 /** @todo can we return 0? (nobody is calling this...) */
3188 AssertFailed();
3189 return 0;
3190
3191 default:
3192 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3193 }
3194}
3195
3196
3197/**
3198 * Checks one shadow page table entry for a mapping of a physical page.
3199 *
3200 * @returns true / false indicating removal of all relevant PTEs
3201 *
3202 * @param pVM The cross context VM structure.
3203 * @param pPhysPage The guest page in question.
3204 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3205 * @param iShw The shadow page table.
3206 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3207 */
3208static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3209{
3210 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3211 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3212 bool fRet = false;
3213
3214 /*
3215 * Assert sanity.
3216 */
3217 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3218 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3219 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3220
3221 /*
3222 * Then, clear the actual mappings to the page in the shadow PT.
3223 */
3224 switch (pPage->enmKind)
3225 {
3226 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3227 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3228 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3229 {
3230 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3231 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3232 uint32_t u32AndMask = 0;
3233 uint32_t u32OrMask = 0;
3234
3235 if (!fFlushPTEs)
3236 {
3237 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3238 {
3239 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
3240 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
3241 u32OrMask = X86_PTE_RW;
3242 u32AndMask = UINT32_MAX;
3243 fRet = true;
3244 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3245 break;
3246
3247 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3248 u32OrMask = 0;
3249 u32AndMask = ~X86_PTE_RW;
3250 fRet = true;
3251 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3252 break;
3253 default:
3254 /* (shouldn't be here, will assert below) */
3255 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3256 break;
3257 }
3258 }
3259 else
3260 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3261
3262 /* Update the counter if we're removing references. */
3263 if (!u32AndMask)
3264 {
3265 Assert(pPage->cPresent);
3266 Assert(pPool->cPresent);
3267 pPage->cPresent--;
3268 pPool->cPresent--;
3269 }
3270
3271 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3272 {
3273 X86PTE Pte;
3274
3275 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3276 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3277 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3278 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3279
3280 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3281 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3282 return fRet;
3283 }
3284#ifdef LOG_ENABLED
3285 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3286 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3287 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3288 {
3289 Log(("i=%d cFound=%d\n", i, ++cFound));
3290 }
3291#endif
3292 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3293 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3294 break;
3295 }
3296
3297 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3298 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3299 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3300 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3301 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3302 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3303 {
3304 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3305 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3306 uint64_t u64OrMask = 0;
3307 uint64_t u64AndMask = 0;
3308
3309 if (!fFlushPTEs)
3310 {
3311 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3312 {
3313 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3314 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3315 u64OrMask = X86_PTE_RW;
3316 u64AndMask = UINT64_MAX;
3317 fRet = true;
3318 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3319 break;
3320
3321 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3322 u64OrMask = 0;
3323 u64AndMask = ~(uint64_t)X86_PTE_RW;
3324 fRet = true;
3325 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3326 break;
3327
3328 default:
3329 /* (shouldn't be here, will assert below) */
3330 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3331 break;
3332 }
3333 }
3334 else
3335 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3336
3337 /* Update the counter if we're removing references. */
3338 if (!u64AndMask)
3339 {
3340 Assert(pPage->cPresent);
3341 Assert(pPool->cPresent);
3342 pPage->cPresent--;
3343 pPool->cPresent--;
3344 }
3345
3346 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3347 {
3348 X86PTEPAE Pte;
3349
3350 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3351 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3352 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3353 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3354
3355 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3356 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3357 return fRet;
3358 }
3359#ifdef LOG_ENABLED
3360 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3361 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3362 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3363 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3364 Log(("i=%d cFound=%d\n", i, ++cFound));
3365#endif
3366 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3367 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3368 break;
3369 }
3370
3371#ifdef PGM_WITH_LARGE_PAGES
3372 /* Large page case only. */
3373 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3374 {
3375 Assert(pVM->pgm.s.fNestedPaging);
3376
3377 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3378 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3379
3380 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3381 {
3382 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3383 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3384 pPD->a[iPte].u = 0;
3385 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3386
3387 /* Update the counter as we're removing references. */
3388 Assert(pPage->cPresent);
3389 Assert(pPool->cPresent);
3390 pPage->cPresent--;
3391 pPool->cPresent--;
3392
3393 return fRet;
3394 }
3395# ifdef LOG_ENABLED
3396 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3397 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3398 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3399 Log(("i=%d cFound=%d\n", i, ++cFound));
3400# endif
3401 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3402 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3403 break;
3404 }
3405
3406 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3407 case PGMPOOLKIND_PAE_PD_PHYS:
3408 {
3409 Assert(pVM->pgm.s.fNestedPaging);
3410
3411 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3412 PX86PD pPD = (PX86PD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3413
3414 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3415 {
3416 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3417 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3418 pPD->a[iPte].u = 0;
3419 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3420
3421 /* Update the counter as we're removing references. */
3422 Assert(pPage->cPresent);
3423 Assert(pPool->cPresent);
3424 pPage->cPresent--;
3425 pPool->cPresent--;
3426 return fRet;
3427 }
3428# ifdef LOG_ENABLED
3429 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3430 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3431 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3432 Log(("i=%d cFound=%d\n", i, ++cFound));
3433# endif
3434 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3435 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3436 break;
3437 }
3438#endif /* PGM_WITH_LARGE_PAGES */
3439
3440 default:
3441 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3442 }
3443
3444 /* not reached. */
3445#ifndef _MSC_VER
3446 return fRet;
3447#endif
3448}
3449
3450
3451/**
3452 * Scans one shadow page table for mappings of a physical page.
3453 *
3454 * @param pVM The cross context VM structure.
3455 * @param pPhysPage The guest page in question.
3456 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3457 * @param iShw The shadow page table.
3458 */
3459static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3460{
3461 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3462
3463 /* We should only come here with when there's only one reference to this physical page. */
3464 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3465
3466 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3467 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3468 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3469 if (!fKeptPTEs)
3470 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3471 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3472}
3473
3474
3475/**
3476 * Flushes a list of shadow page tables mapping the same physical page.
3477 *
3478 * @param pVM The cross context VM structure.
3479 * @param pPhysPage The guest page in question.
3480 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3481 * @param iPhysExt The physical cross reference extent list to flush.
3482 */
3483static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3484{
3485 PGM_LOCK_ASSERT_OWNER(pVM);
3486 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3487 bool fKeepList = false;
3488
3489 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3490 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt=%u\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3491
3492 const uint16_t iPhysExtStart = iPhysExt;
3493 PPGMPOOLPHYSEXT pPhysExt;
3494 do
3495 {
3496 Assert(iPhysExt < pPool->cMaxPhysExts);
3497 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3498 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3499 {
3500 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3501 {
3502 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3503 if (!fKeptPTEs)
3504 {
3505 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3506 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3507 }
3508 else
3509 fKeepList = true;
3510 }
3511 }
3512 /* next */
3513 iPhysExt = pPhysExt->iNext;
3514 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3515
3516 if (!fKeepList)
3517 {
3518 /* insert the list into the free list and clear the ram range entry. */
3519 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3520 pPool->iPhysExtFreeHead = iPhysExtStart;
3521 /* Invalidate the tracking data. */
3522 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3523 }
3524
3525 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3526}
3527
3528
3529/**
3530 * Flushes all shadow page table mappings of the given guest page.
3531 *
3532 * This is typically called when the host page backing the guest one has been
3533 * replaced or when the page protection was changed due to a guest access
3534 * caught by the monitoring.
3535 *
3536 * @returns VBox status code.
3537 * @retval VINF_SUCCESS if all references has been successfully cleared.
3538 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3539 * pool cleaning. FF and sync flags are set.
3540 *
3541 * @param pVM The cross context VM structure.
3542 * @param GCPhysPage GC physical address of the page in question
3543 * @param pPhysPage The guest page in question.
3544 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3545 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3546 * flushed, it is NOT touched if this isn't necessary.
3547 * The caller MUST initialized this to @a false.
3548 */
3549int pgmPoolTrackUpdateGCPhys(PVM pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3550{
3551 PVMCPU pVCpu = VMMGetCpu(pVM);
3552 pgmLock(pVM);
3553 int rc = VINF_SUCCESS;
3554
3555#ifdef PGM_WITH_LARGE_PAGES
3556 /* Is this page part of a large page? */
3557 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3558 {
3559 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3560 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3561
3562 /* Fetch the large page base. */
3563 PPGMPAGE pLargePage;
3564 if (GCPhysBase != GCPhysPage)
3565 {
3566 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3567 AssertFatal(pLargePage);
3568 }
3569 else
3570 pLargePage = pPhysPage;
3571
3572 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3573
3574 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3575 {
3576 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3577 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3578 pVM->pgm.s.cLargePagesDisabled++;
3579
3580 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3581 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3582
3583 *pfFlushTLBs = true;
3584 pgmUnlock(pVM);
3585 return rc;
3586 }
3587 }
3588#else
3589 NOREF(GCPhysPage);
3590#endif /* PGM_WITH_LARGE_PAGES */
3591
3592 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3593 if (u16)
3594 {
3595 /*
3596 * The zero page is currently screwing up the tracking and we'll
3597 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3598 * is defined, zero pages won't normally be mapped. Some kind of solution
3599 * will be needed for this problem of course, but it will have to wait...
3600 */
3601 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3602 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3603 rc = VINF_PGM_GCPHYS_ALIASED;
3604 else
3605 {
3606# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC) /** @todo we can drop this now. */
3607 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3608 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3609 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
3610# endif
3611
3612 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3613 {
3614 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3615 pgmPoolTrackFlushGCPhysPT(pVM,
3616 pPhysPage,
3617 fFlushPTEs,
3618 PGMPOOL_TD_GET_IDX(u16));
3619 }
3620 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3621 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3622 else
3623 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3624 *pfFlushTLBs = true;
3625
3626# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
3627 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
3628# endif
3629 }
3630 }
3631
3632 if (rc == VINF_PGM_GCPHYS_ALIASED)
3633 {
3634 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3635 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3636 rc = VINF_PGM_SYNC_CR3;
3637 }
3638 pgmUnlock(pVM);
3639 return rc;
3640}
3641
3642
3643/**
3644 * Scans all shadow page tables for mappings of a physical page.
3645 *
3646 * This may be slow, but it's most likely more efficient than cleaning
3647 * out the entire page pool / cache.
3648 *
3649 * @returns VBox status code.
3650 * @retval VINF_SUCCESS if all references has been successfully cleared.
3651 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3652 * a page pool cleaning.
3653 *
3654 * @param pVM The cross context VM structure.
3655 * @param pPhysPage The guest page in question.
3656 */
3657int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3658{
3659 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3660 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3661 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3662 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3663
3664 /*
3665 * There is a limit to what makes sense.
3666 */
3667 if ( pPool->cPresent > 1024
3668 && pVM->cCpus == 1)
3669 {
3670 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3671 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3672 return VINF_PGM_GCPHYS_ALIASED;
3673 }
3674
3675 /*
3676 * Iterate all the pages until we've encountered all that in use.
3677 * This is simple but not quite optimal solution.
3678 */
3679 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P; /** @todo drop X86_PTE_P here as we always test if present separately, anyway. */
3680 const uint32_t u32 = u64; /** @todo move into the 32BIT_PT_xx case */
3681 unsigned cLeft = pPool->cUsedPages;
3682 unsigned iPage = pPool->cCurPages;
3683 while (--iPage >= PGMPOOL_IDX_FIRST)
3684 {
3685 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3686 if ( pPage->GCPhys != NIL_RTGCPHYS
3687 && pPage->cPresent)
3688 {
3689 switch (pPage->enmKind)
3690 {
3691 /*
3692 * We only care about shadow page tables.
3693 */
3694 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3695 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3696 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3697 {
3698 unsigned cPresent = pPage->cPresent;
3699 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3700 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3701 if (pPT->a[i].n.u1Present)
3702 {
3703 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3704 {
3705 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3706 pPT->a[i].u = 0;
3707
3708 /* Update the counter as we're removing references. */
3709 Assert(pPage->cPresent);
3710 Assert(pPool->cPresent);
3711 pPage->cPresent--;
3712 pPool->cPresent--;
3713 }
3714 if (!--cPresent)
3715 break;
3716 }
3717 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3718 break;
3719 }
3720
3721 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3722 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3723 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3724 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3725 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3726 {
3727 unsigned cPresent = pPage->cPresent;
3728 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3729 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3730 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3731 {
3732 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3733 {
3734 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3735 PGMSHWPTEPAE_SET(pPT->a[i], 0); /// @todo why not atomic?
3736
3737 /* Update the counter as we're removing references. */
3738 Assert(pPage->cPresent);
3739 Assert(pPool->cPresent);
3740 pPage->cPresent--;
3741 pPool->cPresent--;
3742 }
3743 if (!--cPresent)
3744 break;
3745 }
3746 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3747 break;
3748 }
3749#ifndef IN_RC
3750 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3751 {
3752 unsigned cPresent = pPage->cPresent;
3753 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3754 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3755 if (pPT->a[i].n.u1Present)
3756 {
3757 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3758 {
3759 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3760 pPT->a[i].u = 0;
3761
3762 /* Update the counter as we're removing references. */
3763 Assert(pPage->cPresent);
3764 Assert(pPool->cPresent);
3765 pPage->cPresent--;
3766 pPool->cPresent--;
3767 }
3768 if (!--cPresent)
3769 break;
3770 }
3771 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3772 break;
3773 }
3774#endif
3775 }
3776 if (!--cLeft)
3777 break;
3778 }
3779 }
3780
3781 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3782 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3783
3784 /*
3785 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3786 */
3787 if (pPool->cPresent > 1024)
3788 {
3789 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3790 return VINF_PGM_GCPHYS_ALIASED;
3791 }
3792
3793 return VINF_SUCCESS;
3794}
3795
3796
3797/**
3798 * Clears the user entry in a user table.
3799 *
3800 * This is used to remove all references to a page when flushing it.
3801 */
3802static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3803{
3804 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3805 Assert(pUser->iUser < pPool->cCurPages);
3806 uint32_t iUserTable = pUser->iUserTable;
3807
3808 /*
3809 * Map the user page. Ignore references made by fictitious pages.
3810 */
3811 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3812 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3813 union
3814 {
3815 uint64_t *pau64;
3816 uint32_t *pau32;
3817 } u;
3818 if (pUserPage->idx < PGMPOOL_IDX_FIRST)
3819 {
3820 Assert(!pUserPage->pvPageR3);
3821 return;
3822 }
3823 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3824
3825
3826 /* Safety precaution in case we change the paging for other modes too in the future. */
3827 Assert(!pgmPoolIsPageLocked(pPage));
3828
3829#ifdef VBOX_STRICT
3830 /*
3831 * Some sanity checks.
3832 */
3833 switch (pUserPage->enmKind)
3834 {
3835 case PGMPOOLKIND_32BIT_PD:
3836 case PGMPOOLKIND_32BIT_PD_PHYS:
3837 Assert(iUserTable < X86_PG_ENTRIES);
3838 break;
3839 case PGMPOOLKIND_PAE_PDPT:
3840 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3841 case PGMPOOLKIND_PAE_PDPT_PHYS:
3842 Assert(iUserTable < 4);
3843 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3844 break;
3845 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3846 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3847 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3848 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3849 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3850 case PGMPOOLKIND_PAE_PD_PHYS:
3851 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3852 break;
3853 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3854 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3855 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3856 break;
3857 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3858 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3859 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3860 break;
3861 case PGMPOOLKIND_64BIT_PML4:
3862 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3863 /* GCPhys >> PAGE_SHIFT is the index here */
3864 break;
3865 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3866 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3867 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3868 break;
3869
3870 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3871 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3872 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3873 break;
3874
3875 case PGMPOOLKIND_ROOT_NESTED:
3876 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3877 break;
3878
3879 default:
3880 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3881 break;
3882 }
3883#endif /* VBOX_STRICT */
3884
3885 /*
3886 * Clear the entry in the user page.
3887 */
3888 switch (pUserPage->enmKind)
3889 {
3890 /* 32-bit entries */
3891 case PGMPOOLKIND_32BIT_PD:
3892 case PGMPOOLKIND_32BIT_PD_PHYS:
3893 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
3894 break;
3895
3896 /* 64-bit entries */
3897 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3898 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3899 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3900 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3901 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3902#ifdef IN_RC
3903 /*
3904 * In 32 bits PAE mode we *must* invalidate the TLB when changing a
3905 * PDPT entry; the CPU fetches them only during cr3 load, so any
3906 * non-present PDPT will continue to cause page faults.
3907 */
3908 ASMReloadCR3();
3909 /* no break */
3910#endif
3911 case PGMPOOLKIND_PAE_PD_PHYS:
3912 case PGMPOOLKIND_PAE_PDPT_PHYS:
3913 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3914 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3915 case PGMPOOLKIND_64BIT_PML4:
3916 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3917 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3918 case PGMPOOLKIND_PAE_PDPT:
3919 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3920 case PGMPOOLKIND_ROOT_NESTED:
3921 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3922 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3923 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
3924 break;
3925
3926 default:
3927 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3928 }
3929 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
3930}
3931
3932
3933/**
3934 * Clears all users of a page.
3935 */
3936static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3937{
3938 /*
3939 * Free all the user records.
3940 */
3941 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3942
3943 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3944 uint16_t i = pPage->iUserHead;
3945 while (i != NIL_PGMPOOL_USER_INDEX)
3946 {
3947 /* Clear enter in user table. */
3948 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3949
3950 /* Free it. */
3951 const uint16_t iNext = paUsers[i].iNext;
3952 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3953 paUsers[i].iNext = pPool->iUserFreeHead;
3954 pPool->iUserFreeHead = i;
3955
3956 /* Next. */
3957 i = iNext;
3958 }
3959 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3960}
3961
3962
3963/**
3964 * Allocates a new physical cross reference extent.
3965 *
3966 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3967 * @param pVM The cross context VM structure.
3968 * @param piPhysExt Where to store the phys ext index.
3969 */
3970PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3971{
3972 PGM_LOCK_ASSERT_OWNER(pVM);
3973 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3974 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3975 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3976 {
3977 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3978 return NULL;
3979 }
3980 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3981 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3982 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3983 *piPhysExt = iPhysExt;
3984 return pPhysExt;
3985}
3986
3987
3988/**
3989 * Frees a physical cross reference extent.
3990 *
3991 * @param pVM The cross context VM structure.
3992 * @param iPhysExt The extent to free.
3993 */
3994void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3995{
3996 PGM_LOCK_ASSERT_OWNER(pVM);
3997 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3998 Assert(iPhysExt < pPool->cMaxPhysExts);
3999 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4000 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4001 {
4002 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4003 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4004 }
4005 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4006 pPool->iPhysExtFreeHead = iPhysExt;
4007}
4008
4009
4010/**
4011 * Frees a physical cross reference extent.
4012 *
4013 * @param pVM The cross context VM structure.
4014 * @param iPhysExt The extent to free.
4015 */
4016void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
4017{
4018 PGM_LOCK_ASSERT_OWNER(pVM);
4019 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4020
4021 const uint16_t iPhysExtStart = iPhysExt;
4022 PPGMPOOLPHYSEXT pPhysExt;
4023 do
4024 {
4025 Assert(iPhysExt < pPool->cMaxPhysExts);
4026 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4027 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4028 {
4029 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4030 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4031 }
4032
4033 /* next */
4034 iPhysExt = pPhysExt->iNext;
4035 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4036
4037 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4038 pPool->iPhysExtFreeHead = iPhysExtStart;
4039}
4040
4041
4042/**
4043 * Insert a reference into a list of physical cross reference extents.
4044 *
4045 * @returns The new tracking data for PGMPAGE.
4046 *
4047 * @param pVM The cross context VM structure.
4048 * @param iPhysExt The physical extent index of the list head.
4049 * @param iShwPT The shadow page table index.
4050 * @param iPte Page table entry
4051 *
4052 */
4053static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
4054{
4055 PGM_LOCK_ASSERT_OWNER(pVM);
4056 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4057 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4058
4059 /*
4060 * Special common cases.
4061 */
4062 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
4063 {
4064 paPhysExts[iPhysExt].aidx[1] = iShwPT;
4065 paPhysExts[iPhysExt].apte[1] = iPte;
4066 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4067 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
4068 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4069 }
4070 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
4071 {
4072 paPhysExts[iPhysExt].aidx[2] = iShwPT;
4073 paPhysExts[iPhysExt].apte[2] = iPte;
4074 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4075 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
4076 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4077 }
4078 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
4079
4080 /*
4081 * General treatment.
4082 */
4083 const uint16_t iPhysExtStart = iPhysExt;
4084 unsigned cMax = 15;
4085 for (;;)
4086 {
4087 Assert(iPhysExt < pPool->cMaxPhysExts);
4088 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4089 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
4090 {
4091 paPhysExts[iPhysExt].aidx[i] = iShwPT;
4092 paPhysExts[iPhysExt].apte[i] = iPte;
4093 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4094 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
4095 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
4096 }
4097 if (!--cMax)
4098 {
4099 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackOverflows);
4100 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4101 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
4102 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4103 }
4104
4105 /* advance */
4106 iPhysExt = paPhysExts[iPhysExt].iNext;
4107 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4108 break;
4109 }
4110
4111 /*
4112 * Add another extent to the list.
4113 */
4114 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4115 if (!pNew)
4116 {
4117 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackNoExtentsLeft);
4118 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4119 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4120 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4121 }
4122 pNew->iNext = iPhysExtStart;
4123 pNew->aidx[0] = iShwPT;
4124 pNew->apte[0] = iPte;
4125 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4126 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4127}
4128
4129
4130/**
4131 * Add a reference to guest physical page where extents are in use.
4132 *
4133 * @returns The new tracking data for PGMPAGE.
4134 *
4135 * @param pVM The cross context VM structure.
4136 * @param pPhysPage Pointer to the aPages entry in the ram range.
4137 * @param u16 The ram range flags (top 16-bits).
4138 * @param iShwPT The shadow page table index.
4139 * @param iPte Page table entry
4140 */
4141uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4142{
4143 pgmLock(pVM);
4144 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4145 {
4146 /*
4147 * Convert to extent list.
4148 */
4149 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4150 uint16_t iPhysExt;
4151 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4152 if (pPhysExt)
4153 {
4154 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4155 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliased);
4156 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4157 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4158 pPhysExt->aidx[1] = iShwPT;
4159 pPhysExt->apte[1] = iPte;
4160 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4161 }
4162 else
4163 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4164 }
4165 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4166 {
4167 /*
4168 * Insert into the extent list.
4169 */
4170 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4171 }
4172 else
4173 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedLots);
4174 pgmUnlock(pVM);
4175 return u16;
4176}
4177
4178
4179/**
4180 * Clear references to guest physical memory.
4181 *
4182 * @param pPool The pool.
4183 * @param pPage The page.
4184 * @param pPhysPage Pointer to the aPages entry in the ram range.
4185 * @param iPte Shadow PTE index
4186 */
4187void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4188{
4189 PVM pVM = pPool->CTX_SUFF(pVM);
4190 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4191 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4192
4193 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4194 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4195 {
4196 pgmLock(pVM);
4197
4198 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4199 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4200 do
4201 {
4202 Assert(iPhysExt < pPool->cMaxPhysExts);
4203
4204 /*
4205 * Look for the shadow page and check if it's all freed.
4206 */
4207 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4208 {
4209 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4210 && paPhysExts[iPhysExt].apte[i] == iPte)
4211 {
4212 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4213 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4214
4215 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4216 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4217 {
4218 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4219 pgmUnlock(pVM);
4220 return;
4221 }
4222
4223 /* we can free the node. */
4224 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4225 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4226 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4227 {
4228 /* lonely node */
4229 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4230 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4231 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4232 }
4233 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4234 {
4235 /* head */
4236 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4237 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4238 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4239 }
4240 else
4241 {
4242 /* in list */
4243 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4244 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4245 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4246 }
4247 iPhysExt = iPhysExtNext;
4248 pgmUnlock(pVM);
4249 return;
4250 }
4251 }
4252
4253 /* next */
4254 iPhysExtPrev = iPhysExt;
4255 iPhysExt = paPhysExts[iPhysExt].iNext;
4256 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4257
4258 pgmUnlock(pVM);
4259 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4260 }
4261 else /* nothing to do */
4262 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4263}
4264
4265/**
4266 * Clear references to guest physical memory.
4267 *
4268 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4269 * physical address is assumed to be correct, so the linear search can be
4270 * skipped and we can assert at an earlier point.
4271 *
4272 * @param pPool The pool.
4273 * @param pPage The page.
4274 * @param HCPhys The host physical address corresponding to the guest page.
4275 * @param GCPhys The guest physical address corresponding to HCPhys.
4276 * @param iPte Shadow PTE index
4277 */
4278static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4279{
4280 /*
4281 * Lookup the page and check if it checks out before derefing it.
4282 */
4283 PVM pVM = pPool->CTX_SUFF(pVM);
4284 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4285 if (pPhysPage)
4286 {
4287 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4288#ifdef LOG_ENABLED
4289 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4290 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4291#endif
4292 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4293 {
4294 Assert(pPage->cPresent);
4295 Assert(pPool->cPresent);
4296 pPage->cPresent--;
4297 pPool->cPresent--;
4298 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4299 return;
4300 }
4301
4302 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp\n",
4303 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage)));
4304 }
4305 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4306}
4307
4308
4309/**
4310 * Clear references to guest physical memory.
4311 *
4312 * @param pPool The pool.
4313 * @param pPage The page.
4314 * @param HCPhys The host physical address corresponding to the guest page.
4315 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4316 * @param iPte Shadow pte index
4317 */
4318void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4319{
4320 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4321
4322 /*
4323 * Try the hint first.
4324 */
4325 RTHCPHYS HCPhysHinted;
4326 PVM pVM = pPool->CTX_SUFF(pVM);
4327 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4328 if (pPhysPage)
4329 {
4330 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4331 Assert(HCPhysHinted);
4332 if (HCPhysHinted == HCPhys)
4333 {
4334 Assert(pPage->cPresent);
4335 Assert(pPool->cPresent);
4336 pPage->cPresent--;
4337 pPool->cPresent--;
4338 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4339 return;
4340 }
4341 }
4342 else
4343 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4344
4345 /*
4346 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4347 */
4348 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4349 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX);
4350 while (pRam)
4351 {
4352 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4353 while (iPage-- > 0)
4354 {
4355 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4356 {
4357 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4358 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4359 Assert(pPage->cPresent);
4360 Assert(pPool->cPresent);
4361 pPage->cPresent--;
4362 pPool->cPresent--;
4363 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4364 return;
4365 }
4366 }
4367 pRam = pRam->CTX_SUFF(pNext);
4368 }
4369
4370 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4371}
4372
4373
4374/**
4375 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4376 *
4377 * @param pPool The pool.
4378 * @param pPage The page.
4379 * @param pShwPT The shadow page table (mapping of the page).
4380 * @param pGstPT The guest page table.
4381 */
4382DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4383{
4384 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4385 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4386 {
4387 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4388 if (pShwPT->a[i].n.u1Present)
4389 {
4390 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4391 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4392 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & fPgMask, i);
4393 if (!pPage->cPresent)
4394 break;
4395 }
4396 }
4397}
4398
4399
4400/**
4401 * Clear references to guest physical memory in a PAE / 32-bit page table.
4402 *
4403 * @param pPool The pool.
4404 * @param pPage The page.
4405 * @param pShwPT The shadow page table (mapping of the page).
4406 * @param pGstPT The guest page table (just a half one).
4407 */
4408DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4409{
4410 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4411 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4412 {
4413 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4414 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4415 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4416 {
4417 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4418 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4419 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4420 if (!pPage->cPresent)
4421 break;
4422 }
4423 }
4424}
4425
4426
4427/**
4428 * Clear references to guest physical memory in a PAE / PAE page table.
4429 *
4430 * @param pPool The pool.
4431 * @param pPage The page.
4432 * @param pShwPT The shadow page table (mapping of the page).
4433 * @param pGstPT The guest page table.
4434 */
4435DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4436{
4437 RTGCPHYS const fPgMask = pPage->fA20Enabled ? X86_PTE_PAE_PG_MASK : X86_PTE_PAE_PG_MASK & ~RT_BIT_64(20);
4438 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4439 {
4440 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4441 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4442 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4443 {
4444 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4445 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4446 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4447 if (!pPage->cPresent)
4448 break;
4449 }
4450 }
4451}
4452
4453
4454/**
4455 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4456 *
4457 * @param pPool The pool.
4458 * @param pPage The page.
4459 * @param pShwPT The shadow page table (mapping of the page).
4460 */
4461DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4462{
4463 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4464 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4465 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4466 {
4467 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4468 if (pShwPT->a[i].n.u1Present)
4469 {
4470 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4471 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4472 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4473 if (!pPage->cPresent)
4474 break;
4475 }
4476 }
4477}
4478
4479
4480/**
4481 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4482 *
4483 * @param pPool The pool.
4484 * @param pPage The page.
4485 * @param pShwPT The shadow page table (mapping of the page).
4486 */
4487DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4488{
4489 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4490 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4491 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4492 {
4493 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4494 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4495 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4496 {
4497 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4498 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4499 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys & GCPhysA20Mask, i);
4500 if (!pPage->cPresent)
4501 break;
4502 }
4503 }
4504}
4505
4506
4507/**
4508 * Clear references to shadowed pages in an EPT page table.
4509 *
4510 * @param pPool The pool.
4511 * @param pPage The page.
4512 * @param pShwPT The shadow page directory pointer table (mapping of the
4513 * page).
4514 */
4515DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4516{
4517 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4518 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4519 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4520 {
4521 Assert((pShwPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4522 if (pShwPT->a[i].n.u1Present)
4523 {
4524 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4525 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4526 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4527 if (!pPage->cPresent)
4528 break;
4529 }
4530 }
4531}
4532
4533
4534/**
4535 * Clear references to shadowed pages in a 32 bits page directory.
4536 *
4537 * @param pPool The pool.
4538 * @param pPage The page.
4539 * @param pShwPD The shadow page directory (mapping of the page).
4540 */
4541DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4542{
4543 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4544 {
4545 Assert(!(pShwPD->a[i].u & RT_BIT_32(9)));
4546 if ( pShwPD->a[i].n.u1Present
4547 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4548 )
4549 {
4550 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4551 if (pSubPage)
4552 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4553 else
4554 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4555 }
4556 }
4557}
4558
4559
4560/**
4561 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4562 *
4563 * @param pPool The pool.
4564 * @param pPage The page.
4565 * @param pShwPD The shadow page directory (mapping of the page).
4566 */
4567DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4568{
4569 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4570 {
4571 if ( pShwPD->a[i].n.u1Present
4572 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING))
4573 {
4574#ifdef PGM_WITH_LARGE_PAGES
4575 if (pShwPD->a[i].b.u1Size)
4576 {
4577 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4578 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4579 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK,
4580 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4581 i);
4582 }
4583 else
4584#endif
4585 {
4586 Assert((pShwPD->a[i].u & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4587 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4588 if (pSubPage)
4589 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4590 else
4591 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4592 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4593 }
4594 }
4595 }
4596}
4597
4598
4599/**
4600 * Clear references to shadowed pages in a PAE page directory pointer table.
4601 *
4602 * @param pPool The pool.
4603 * @param pPage The page.
4604 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4605 */
4606DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4607{
4608 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4609 {
4610 Assert((pShwPDPT->a[i].u & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4611 if ( pShwPDPT->a[i].n.u1Present
4612 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4613 )
4614 {
4615 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4616 if (pSubPage)
4617 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4618 else
4619 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4620 }
4621 }
4622}
4623
4624
4625/**
4626 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4627 *
4628 * @param pPool The pool.
4629 * @param pPage The page.
4630 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4631 */
4632DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4633{
4634 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4635 {
4636 Assert((pShwPDPT->a[i].u & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4637 if (pShwPDPT->a[i].n.u1Present)
4638 {
4639 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4640 if (pSubPage)
4641 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4642 else
4643 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4644 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4645 }
4646 }
4647}
4648
4649
4650/**
4651 * Clear references to shadowed pages in a 64-bit level 4 page table.
4652 *
4653 * @param pPool The pool.
4654 * @param pPage The page.
4655 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4656 */
4657DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4658{
4659 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4660 {
4661 Assert((pShwPML4->a[i].u & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4662 if (pShwPML4->a[i].n.u1Present)
4663 {
4664 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4665 if (pSubPage)
4666 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4667 else
4668 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4669 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4670 }
4671 }
4672}
4673
4674
4675/**
4676 * Clear references to shadowed pages in an EPT page directory.
4677 *
4678 * @param pPool The pool.
4679 * @param pPage The page.
4680 * @param pShwPD The shadow page directory (mapping of the page).
4681 */
4682DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4683{
4684 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4685 {
4686 Assert((pShwPD->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4687 if (pShwPD->a[i].n.u1Present)
4688 {
4689#ifdef PGM_WITH_LARGE_PAGES
4690 if (pShwPD->a[i].b.u1Size)
4691 {
4692 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4693 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4694 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK,
4695 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4696 i);
4697 }
4698 else
4699#endif
4700 {
4701 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4702 if (pSubPage)
4703 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4704 else
4705 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4706 }
4707 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4708 }
4709 }
4710}
4711
4712
4713/**
4714 * Clear references to shadowed pages in an EPT page directory pointer table.
4715 *
4716 * @param pPool The pool.
4717 * @param pPage The page.
4718 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4719 */
4720DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4721{
4722 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4723 {
4724 Assert((pShwPDPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4725 if (pShwPDPT->a[i].n.u1Present)
4726 {
4727 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4728 if (pSubPage)
4729 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4730 else
4731 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4732 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4733 }
4734 }
4735}
4736
4737
4738/**
4739 * Clears all references made by this page.
4740 *
4741 * This includes other shadow pages and GC physical addresses.
4742 *
4743 * @param pPool The pool.
4744 * @param pPage The page.
4745 */
4746static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4747{
4748 /*
4749 * Map the shadow page and take action according to the page kind.
4750 */
4751 PVM pVM = pPool->CTX_SUFF(pVM);
4752 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4753 switch (pPage->enmKind)
4754 {
4755 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4756 {
4757 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4758 void *pvGst;
4759 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4760 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4761 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4762 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4763 break;
4764 }
4765
4766 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4767 {
4768 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4769 void *pvGst;
4770 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4771 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
4772 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4773 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4774 break;
4775 }
4776
4777 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4778 {
4779 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4780 void *pvGst;
4781 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4782 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
4783 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4784 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4785 break;
4786 }
4787
4788 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4789 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4790 {
4791 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4792 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4793 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4794 break;
4795 }
4796
4797 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4798 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4799 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4800 {
4801 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4802 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
4803 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4804 break;
4805 }
4806
4807 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4808 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4809 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4810 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4811 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4812 case PGMPOOLKIND_PAE_PD_PHYS:
4813 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4814 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4815 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4816 break;
4817
4818 case PGMPOOLKIND_32BIT_PD_PHYS:
4819 case PGMPOOLKIND_32BIT_PD:
4820 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4821 break;
4822
4823 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4824 case PGMPOOLKIND_PAE_PDPT:
4825 case PGMPOOLKIND_PAE_PDPT_PHYS:
4826 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4827 break;
4828
4829 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4830 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4831 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4832 break;
4833
4834 case PGMPOOLKIND_64BIT_PML4:
4835 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4836 break;
4837
4838 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4839 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4840 break;
4841
4842 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4843 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4844 break;
4845
4846 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4847 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4848 break;
4849
4850 default:
4851 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4852 }
4853
4854 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4855 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4856 ASMMemZeroPage(pvShw);
4857 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4858 pPage->fZeroed = true;
4859 Assert(!pPage->cPresent);
4860 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
4861}
4862
4863
4864/**
4865 * Flushes a pool page.
4866 *
4867 * This moves the page to the free list after removing all user references to it.
4868 *
4869 * @returns VBox status code.
4870 * @retval VINF_SUCCESS on success.
4871 * @param pPool The pool.
4872 * @param pPage The shadow page.
4873 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4874 */
4875int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4876{
4877 PVM pVM = pPool->CTX_SUFF(pVM);
4878 bool fFlushRequired = false;
4879
4880 int rc = VINF_SUCCESS;
4881 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4882 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4883 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4884
4885 /*
4886 * Reject any attempts at flushing any of the special root pages (shall
4887 * not happen).
4888 */
4889 AssertMsgReturn(pPage->idx >= PGMPOOL_IDX_FIRST,
4890 ("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n",
4891 pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx),
4892 VINF_SUCCESS);
4893
4894 pgmLock(pVM);
4895
4896 /*
4897 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4898 */
4899 if (pgmPoolIsPageLocked(pPage))
4900 {
4901 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4902 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4903 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4904 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4905 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4906 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4907 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4908 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4909 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
4910 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
4911 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4912 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4913 pgmUnlock(pVM);
4914 return VINF_SUCCESS;
4915 }
4916
4917#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4918 /* Start a subset so we won't run out of mapping space. */
4919 PVMCPU pVCpu = VMMGetCpu(pVM);
4920 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
4921#endif
4922
4923 /*
4924 * Mark the page as being in need of an ASMMemZeroPage().
4925 */
4926 pPage->fZeroed = false;
4927
4928#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4929 if (pPage->fDirty)
4930 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirtyEntry, false /* do not remove */);
4931#endif
4932
4933 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4934 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4935 fFlushRequired = true;
4936
4937 /*
4938 * Clear the page.
4939 */
4940 pgmPoolTrackClearPageUsers(pPool, pPage);
4941 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4942 pgmPoolTrackDeref(pPool, pPage);
4943 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4944
4945 /*
4946 * Flush it from the cache.
4947 */
4948 pgmPoolCacheFlushPage(pPool, pPage);
4949
4950#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4951 /* Heavy stuff done. */
4952 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
4953#endif
4954
4955 /*
4956 * Deregistering the monitoring.
4957 */
4958 if (pPage->fMonitored)
4959 rc = pgmPoolMonitorFlush(pPool, pPage);
4960
4961 /*
4962 * Free the page.
4963 */
4964 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4965 pPage->iNext = pPool->iFreeHead;
4966 pPool->iFreeHead = pPage->idx;
4967 pPage->enmKind = PGMPOOLKIND_FREE;
4968 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4969 pPage->GCPhys = NIL_RTGCPHYS;
4970 pPage->fReusedFlushPending = false;
4971
4972 pPool->cUsedPages--;
4973
4974 /* Flush the TLBs of all VCPUs if required. */
4975 if ( fFlushRequired
4976 && fFlush)
4977 {
4978 PGM_INVL_ALL_VCPU_TLBS(pVM);
4979 }
4980
4981 pgmUnlock(pVM);
4982 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4983 return rc;
4984}
4985
4986
4987/**
4988 * Frees a usage of a pool page.
4989 *
4990 * The caller is responsible to updating the user table so that it no longer
4991 * references the shadow page.
4992 *
4993 * @param pPool The pool.
4994 * @param pPage The shadow page.
4995 * @param iUser The shadow page pool index of the user table.
4996 * NIL_PGMPOOL_IDX for root pages.
4997 * @param iUserTable The index into the user table (shadowed). Ignored if
4998 * root page.
4999 */
5000void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
5001{
5002 PVM pVM = pPool->CTX_SUFF(pVM);
5003
5004 STAM_PROFILE_START(&pPool->StatFree, a);
5005 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
5006 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
5007 AssertReturnVoid(pPage->idx >= PGMPOOL_IDX_FIRST); /* paranoia (#6349) */
5008
5009 pgmLock(pVM);
5010 if (iUser != NIL_PGMPOOL_IDX)
5011 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
5012 if (!pPage->fCached)
5013 pgmPoolFlushPage(pPool, pPage);
5014 pgmUnlock(pVM);
5015 STAM_PROFILE_STOP(&pPool->StatFree, a);
5016}
5017
5018
5019/**
5020 * Makes one or more free page free.
5021 *
5022 * @returns VBox status code.
5023 * @retval VINF_SUCCESS on success.
5024 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
5025 *
5026 * @param pPool The pool.
5027 * @param enmKind Page table kind
5028 * @param iUser The user of the page.
5029 */
5030static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
5031{
5032 PVM pVM = pPool->CTX_SUFF(pVM);
5033 LogFlow(("pgmPoolMakeMoreFreePages: enmKind=%d iUser=%d\n", enmKind, iUser));
5034 NOREF(enmKind);
5035
5036 /*
5037 * If the pool isn't full grown yet, expand it.
5038 */
5039 if ( pPool->cCurPages < pPool->cMaxPages
5040#if defined(IN_RC)
5041 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
5042 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
5043 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
5044#endif
5045 )
5046 {
5047 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
5048#ifdef IN_RING3
5049 int rc = PGMR3PoolGrow(pVM);
5050#else
5051 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
5052#endif
5053 if (RT_FAILURE(rc))
5054 return rc;
5055 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
5056 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
5057 return VINF_SUCCESS;
5058 }
5059
5060 /*
5061 * Free one cached page.
5062 */
5063 return pgmPoolCacheFreeOne(pPool, iUser);
5064}
5065
5066
5067/**
5068 * Allocates a page from the pool.
5069 *
5070 * This page may actually be a cached page and not in need of any processing
5071 * on the callers part.
5072 *
5073 * @returns VBox status code.
5074 * @retval VINF_SUCCESS if a NEW page was allocated.
5075 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
5076 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
5077 *
5078 * @param pVM The cross context VM structure.
5079 * @param GCPhys The GC physical address of the page we're gonna shadow.
5080 * For 4MB and 2MB PD entries, it's the first address the
5081 * shadow PT is covering.
5082 * @param enmKind The kind of mapping.
5083 * @param enmAccess Access type for the mapping (only relevant for big pages)
5084 * @param fA20Enabled Whether the A20 gate is enabled or not.
5085 * @param iUser The shadow page pool index of the user table. Root
5086 * pages should pass NIL_PGMPOOL_IDX.
5087 * @param iUserTable The index into the user table (shadowed). Ignored for
5088 * root pages (iUser == NIL_PGMPOOL_IDX).
5089 * @param fLockPage Lock the page
5090 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
5091 */
5092int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
5093 uint16_t iUser, uint32_t iUserTable, bool fLockPage, PPPGMPOOLPAGE ppPage)
5094{
5095 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5096 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
5097 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
5098 *ppPage = NULL;
5099 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
5100 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
5101 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
5102
5103 pgmLock(pVM);
5104
5105 if (pPool->fCacheEnabled)
5106 {
5107 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, fA20Enabled, iUser, iUserTable, ppPage);
5108 if (RT_SUCCESS(rc2))
5109 {
5110 if (fLockPage)
5111 pgmPoolLockPage(pPool, *ppPage);
5112 pgmUnlock(pVM);
5113 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5114 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
5115 return rc2;
5116 }
5117 }
5118
5119 /*
5120 * Allocate a new one.
5121 */
5122 int rc = VINF_SUCCESS;
5123 uint16_t iNew = pPool->iFreeHead;
5124 if (iNew == NIL_PGMPOOL_IDX)
5125 {
5126 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
5127 if (RT_FAILURE(rc))
5128 {
5129 pgmUnlock(pVM);
5130 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
5131 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5132 return rc;
5133 }
5134 iNew = pPool->iFreeHead;
5135 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_PGM_POOL_IPE);
5136 }
5137
5138 /* unlink the free head */
5139 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5140 pPool->iFreeHead = pPage->iNext;
5141 pPage->iNext = NIL_PGMPOOL_IDX;
5142
5143 /*
5144 * Initialize it.
5145 */
5146 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5147 pPage->enmKind = enmKind;
5148 pPage->enmAccess = enmAccess;
5149 pPage->GCPhys = GCPhys;
5150 pPage->fA20Enabled = fA20Enabled;
5151 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5152 pPage->fMonitored = false;
5153 pPage->fCached = false;
5154 pPage->fDirty = false;
5155 pPage->fReusedFlushPending = false;
5156 pPage->cModifications = 0;
5157 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5158 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5159 pPage->cPresent = 0;
5160 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5161 pPage->idxDirtyEntry = 0;
5162 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5163 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5164 pPage->cLastAccessHandler = 0;
5165 pPage->cLocked = 0;
5166# ifdef VBOX_STRICT
5167 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5168# endif
5169
5170 /*
5171 * Insert into the tracking and cache. If this fails, free the page.
5172 */
5173 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5174 if (RT_FAILURE(rc3))
5175 {
5176 pPool->cUsedPages--;
5177 pPage->enmKind = PGMPOOLKIND_FREE;
5178 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5179 pPage->GCPhys = NIL_RTGCPHYS;
5180 pPage->iNext = pPool->iFreeHead;
5181 pPool->iFreeHead = pPage->idx;
5182 pgmUnlock(pVM);
5183 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5184 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
5185 return rc3;
5186 }
5187
5188 /*
5189 * Commit the allocation, clear the page and return.
5190 */
5191#ifdef VBOX_WITH_STATISTICS
5192 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5193 pPool->cUsedPagesHigh = pPool->cUsedPages;
5194#endif
5195
5196 if (!pPage->fZeroed)
5197 {
5198 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5199 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5200 ASMMemZeroPage(pv);
5201 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5202 }
5203
5204 *ppPage = pPage;
5205 if (fLockPage)
5206 pgmPoolLockPage(pPool, pPage);
5207 pgmUnlock(pVM);
5208 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5209 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5210 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5211 return rc;
5212}
5213
5214
5215/**
5216 * Frees a usage of a pool page.
5217 *
5218 * @param pVM The cross context VM structure.
5219 * @param HCPhys The HC physical address of the shadow page.
5220 * @param iUser The shadow page pool index of the user table.
5221 * NIL_PGMPOOL_IDX if root page.
5222 * @param iUserTable The index into the user table (shadowed). Ignored if
5223 * root page.
5224 */
5225void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5226{
5227 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5228 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5229 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5230}
5231
5232
5233/**
5234 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5235 *
5236 * @returns Pointer to the shadow page structure.
5237 * @param pPool The pool.
5238 * @param HCPhys The HC physical address of the shadow page.
5239 */
5240PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5241{
5242 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5243
5244 /*
5245 * Look up the page.
5246 */
5247 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5248
5249 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5250 return pPage;
5251}
5252
5253
5254/**
5255 * Internal worker for finding a page for debugging purposes, no assertions.
5256 *
5257 * @returns Pointer to the shadow page structure. NULL on if not found.
5258 * @param pPool The pool.
5259 * @param HCPhys The HC physical address of the shadow page.
5260 */
5261PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5262{
5263 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5264 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5265}
5266
5267#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5268
5269/**
5270 * Flush the specified page if present
5271 *
5272 * @param pVM The cross context VM structure.
5273 * @param GCPhys Guest physical address of the page to flush
5274 */
5275void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5276{
5277 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5278
5279 VM_ASSERT_EMT(pVM);
5280
5281 /*
5282 * Look up the GCPhys in the hash.
5283 */
5284 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5285 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5286 if (i == NIL_PGMPOOL_IDX)
5287 return;
5288
5289 do
5290 {
5291 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5292 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5293 {
5294 switch (pPage->enmKind)
5295 {
5296 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5297 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5298 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5299 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5300 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5301 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5302 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5303 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5304 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5305 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5306 case PGMPOOLKIND_64BIT_PML4:
5307 case PGMPOOLKIND_32BIT_PD:
5308 case PGMPOOLKIND_PAE_PDPT:
5309 {
5310 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5311#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5312 if (pPage->fDirty)
5313 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5314 else
5315#endif
5316 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5317 Assert(!pgmPoolIsPageLocked(pPage));
5318 pgmPoolMonitorChainFlush(pPool, pPage);
5319 return;
5320 }
5321
5322 /* ignore, no monitoring. */
5323 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5324 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5325 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5326 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5327 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5328 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5329 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5330 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5331 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5332 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5333 case PGMPOOLKIND_ROOT_NESTED:
5334 case PGMPOOLKIND_PAE_PD_PHYS:
5335 case PGMPOOLKIND_PAE_PDPT_PHYS:
5336 case PGMPOOLKIND_32BIT_PD_PHYS:
5337 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5338 break;
5339
5340 default:
5341 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5342 }
5343 }
5344
5345 /* next */
5346 i = pPage->iNext;
5347 } while (i != NIL_PGMPOOL_IDX);
5348 return;
5349}
5350
5351#endif /* IN_RING3 */
5352#ifdef IN_RING3
5353
5354/**
5355 * Reset CPU on hot plugging.
5356 *
5357 * @param pVM The cross context VM structure.
5358 * @param pVCpu The cross context virtual CPU structure.
5359 */
5360void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5361{
5362 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5363
5364 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5365 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5366 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5367}
5368
5369
5370/**
5371 * Flushes the entire cache.
5372 *
5373 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5374 * this and execute this CR3 flush.
5375 *
5376 * @param pVM The cross context VM structure.
5377 */
5378void pgmR3PoolReset(PVM pVM)
5379{
5380 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5381
5382 PGM_LOCK_ASSERT_OWNER(pVM);
5383 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5384 LogFlow(("pgmR3PoolReset:\n"));
5385
5386 /*
5387 * If there are no pages in the pool, there is nothing to do.
5388 */
5389 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5390 {
5391 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5392 return;
5393 }
5394
5395 /*
5396 * Exit the shadow mode since we're going to clear everything,
5397 * including the root page.
5398 */
5399 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5400 pgmR3ExitShadowModeBeforePoolFlush(&pVM->aCpus[i]);
5401
5402 /*
5403 * Nuke the free list and reinsert all pages into it.
5404 */
5405 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5406 {
5407 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5408
5409 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
5410 if (pPage->fMonitored)
5411 pgmPoolMonitorFlush(pPool, pPage);
5412 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5413 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5414 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5415 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5416 pPage->cModifications = 0;
5417 pPage->GCPhys = NIL_RTGCPHYS;
5418 pPage->enmKind = PGMPOOLKIND_FREE;
5419 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5420 Assert(pPage->idx == i);
5421 pPage->iNext = i + 1;
5422 pPage->fA20Enabled = true;
5423 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5424 pPage->fSeenNonGlobal = false;
5425 pPage->fMonitored = false;
5426 pPage->fDirty = false;
5427 pPage->fCached = false;
5428 pPage->fReusedFlushPending = false;
5429 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5430 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5431 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5432 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5433 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5434 pPage->cLastAccessHandler = 0;
5435 pPage->cLocked = 0;
5436#ifdef VBOX_STRICT
5437 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5438#endif
5439 }
5440 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5441 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5442 pPool->cUsedPages = 0;
5443
5444 /*
5445 * Zap and reinitialize the user records.
5446 */
5447 pPool->cPresent = 0;
5448 pPool->iUserFreeHead = 0;
5449 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5450 const unsigned cMaxUsers = pPool->cMaxUsers;
5451 for (unsigned i = 0; i < cMaxUsers; i++)
5452 {
5453 paUsers[i].iNext = i + 1;
5454 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5455 paUsers[i].iUserTable = 0xfffffffe;
5456 }
5457 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5458
5459 /*
5460 * Clear all the GCPhys links and rebuild the phys ext free list.
5461 */
5462 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
5463 pRam;
5464 pRam = pRam->CTX_SUFF(pNext))
5465 {
5466 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5467 while (iPage-- > 0)
5468 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5469 }
5470
5471 pPool->iPhysExtFreeHead = 0;
5472 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5473 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5474 for (unsigned i = 0; i < cMaxPhysExts; i++)
5475 {
5476 paPhysExts[i].iNext = i + 1;
5477 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5478 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5479 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5480 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5481 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5482 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5483 }
5484 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5485
5486 /*
5487 * Just zap the modified list.
5488 */
5489 pPool->cModifiedPages = 0;
5490 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5491
5492 /*
5493 * Clear the GCPhys hash and the age list.
5494 */
5495 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5496 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5497 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5498 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5499
5500#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5501 /* Clear all dirty pages. */
5502 pPool->idxFreeDirtyPage = 0;
5503 pPool->cDirtyPages = 0;
5504 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
5505 pPool->aDirtyPages[i].uIdx = NIL_PGMPOOL_IDX;
5506#endif
5507
5508 /*
5509 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5510 */
5511 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5512 {
5513 /*
5514 * Re-enter the shadowing mode and assert Sync CR3 FF.
5515 */
5516 PVMCPU pVCpu = &pVM->aCpus[i];
5517 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5518 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5519 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5520 }
5521
5522 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5523}
5524
5525#endif /* IN_RING3 */
5526
5527#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5528/**
5529 * Stringifies a PGMPOOLKIND value.
5530 */
5531static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5532{
5533 switch ((PGMPOOLKIND)enmKind)
5534 {
5535 case PGMPOOLKIND_INVALID:
5536 return "PGMPOOLKIND_INVALID";
5537 case PGMPOOLKIND_FREE:
5538 return "PGMPOOLKIND_FREE";
5539 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5540 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5541 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5542 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5543 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5544 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5545 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5546 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5547 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5548 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5549 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5550 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5551 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5552 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5553 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5554 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5555 case PGMPOOLKIND_32BIT_PD:
5556 return "PGMPOOLKIND_32BIT_PD";
5557 case PGMPOOLKIND_32BIT_PD_PHYS:
5558 return "PGMPOOLKIND_32BIT_PD_PHYS";
5559 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5560 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5561 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5562 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5563 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5564 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5565 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5566 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5567 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5568 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5569 case PGMPOOLKIND_PAE_PD_PHYS:
5570 return "PGMPOOLKIND_PAE_PD_PHYS";
5571 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5572 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5573 case PGMPOOLKIND_PAE_PDPT:
5574 return "PGMPOOLKIND_PAE_PDPT";
5575 case PGMPOOLKIND_PAE_PDPT_PHYS:
5576 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5577 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5578 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5579 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5580 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5581 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5582 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5583 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5584 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5585 case PGMPOOLKIND_64BIT_PML4:
5586 return "PGMPOOLKIND_64BIT_PML4";
5587 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5588 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5589 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5590 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5591 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5592 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5593 case PGMPOOLKIND_ROOT_NESTED:
5594 return "PGMPOOLKIND_ROOT_NESTED";
5595 }
5596 return "Unknown kind!";
5597}
5598#endif /* LOG_ENABLED || VBOX_STRICT */
5599
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette