VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 55910

Last change on this file since 55910 was 55910, checked in by vboxsync, 10 years ago

pgmPoolAccessHandler: Make use of the enmOrigin to detect device writes and increase the write size to 16, doing proper splitting in the manner of pgmPoolAccessPfHandlerSimple.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 213.8 KB
Line 
1/* $Id: PGMAllPool.cpp 55910 2015-05-18 13:36:19Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2013 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#include <VBox/vmm/pgm.h>
24#include <VBox/vmm/mm.h>
25#include <VBox/vmm/em.h>
26#include <VBox/vmm/cpum.h>
27#ifdef IN_RC
28# include <VBox/vmm/patm.h>
29#endif
30#include "PGMInternal.h"
31#include <VBox/vmm/vm.h>
32#include "PGMInline.h"
33#include <VBox/disopcode.h>
34#include <VBox/vmm/hm_vmx.h>
35
36#include <VBox/log.h>
37#include <VBox/err.h>
38#include <iprt/asm.h>
39#include <iprt/asm-amd64-x86.h>
40#include <iprt/string.h>
41
42
43/*******************************************************************************
44* Internal Functions *
45*******************************************************************************/
46RT_C_DECLS_BEGIN
47DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
48DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
49static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
50static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
51static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
52static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
54static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
55#endif
56#if 0 /*defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)*/
57static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
58#endif
59
60int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
61PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
62void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
63void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
64
65RT_C_DECLS_END
66
67
68/**
69 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
70 *
71 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
72 * @param enmKind The page kind.
73 */
74DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
75{
76 switch (enmKind)
77 {
78 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
79 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
81 return true;
82 default:
83 return false;
84 }
85}
86
87
88/**
89 * Flushes a chain of pages sharing the same access monitor.
90 *
91 * @returns VBox status code suitable for scheduling.
92 * @param pPool The pool.
93 * @param pPage A page in the chain.
94 * @todo VBOXSTRICTRC
95 */
96int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
97{
98 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
99
100 /*
101 * Find the list head.
102 */
103 uint16_t idx = pPage->idx;
104 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
105 {
106 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
107 {
108 idx = pPage->iMonitoredPrev;
109 Assert(idx != pPage->idx);
110 pPage = &pPool->aPages[idx];
111 }
112 }
113
114 /*
115 * Iterate the list flushing each shadow page.
116 */
117 int rc = VINF_SUCCESS;
118 for (;;)
119 {
120 idx = pPage->iMonitoredNext;
121 Assert(idx != pPage->idx);
122 if (pPage->idx >= PGMPOOL_IDX_FIRST)
123 {
124 int rc2 = pgmPoolFlushPage(pPool, pPage);
125 AssertRC(rc2);
126 }
127 /* next */
128 if (idx == NIL_PGMPOOL_IDX)
129 break;
130 pPage = &pPool->aPages[idx];
131 }
132 return rc;
133}
134
135
136/**
137 * Wrapper for getting the current context pointer to the entry being modified.
138 *
139 * @returns VBox status code suitable for scheduling.
140 * @param pVM Pointer to the VM.
141 * @param pvDst Destination address
142 * @param pvSrc Pointer to the mapping of @a GCPhysSrc or NULL depending
143 * on the context (e.g. \#PF in R0 & RC).
144 * @param GCPhysSrc The source guest physical address.
145 * @param cb Size of data to read
146 */
147DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, void const *pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
148{
149#if defined(IN_RING3)
150 NOREF(pVM); NOREF(GCPhysSrc);
151 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
152 return VINF_SUCCESS;
153#else
154 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
155 NOREF(pvSrc);
156 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
157#endif
158}
159
160
161/**
162 * Process shadow entries before they are changed by the guest.
163 *
164 * For PT entries we will clear them. For PD entries, we'll simply check
165 * for mapping conflicts and set the SyncCR3 FF if found.
166 *
167 * @param pVCpu Pointer to the VMCPU.
168 * @param pPool The pool.
169 * @param pPage The head page.
170 * @param GCPhysFault The guest physical fault address.
171 * @param pvAddress Pointer to the mapping of @a GCPhysFault or NULL
172 * depending on the context (e.g. \#PF in R0 & RC).
173 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
174 */
175static void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault,
176 void const *pvAddress, unsigned cbWrite)
177{
178 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
179 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
180 PVM pVM = pPool->CTX_SUFF(pVM);
181 NOREF(pVCpu);
182
183 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))pvAddress, GCPhysFault, cbWrite));
184
185 for (;;)
186 {
187 union
188 {
189 void *pv;
190 PX86PT pPT;
191 PPGMSHWPTPAE pPTPae;
192 PX86PD pPD;
193 PX86PDPAE pPDPae;
194 PX86PDPT pPDPT;
195 PX86PML4 pPML4;
196 } uShw;
197
198 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s\n", pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
199
200 uShw.pv = NULL;
201 switch (pPage->enmKind)
202 {
203 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
204 {
205 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
206 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
207 const unsigned iShw = off / sizeof(X86PTE);
208 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
209 if (uShw.pPT->a[iShw].n.u1Present)
210 {
211 X86PTE GstPte;
212
213 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
214 AssertRC(rc);
215 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
216 pgmPoolTracDerefGCPhysHint(pPool, pPage,
217 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
218 GstPte.u & X86_PTE_PG_MASK,
219 iShw);
220 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
221 }
222 break;
223 }
224
225 /* page/2 sized */
226 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
227 {
228 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
229 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
230 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
231 {
232 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
233 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
234 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
235 {
236 X86PTE GstPte;
237 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
238 AssertRC(rc);
239
240 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
241 pgmPoolTracDerefGCPhysHint(pPool, pPage,
242 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
243 GstPte.u & X86_PTE_PG_MASK,
244 iShw);
245 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
246 }
247 }
248 break;
249 }
250
251 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
252 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
253 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
254 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
255 {
256 unsigned iGst = off / sizeof(X86PDE);
257 unsigned iShwPdpt = iGst / 256;
258 unsigned iShw = (iGst % 256) * 2;
259 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
260
261 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
262 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
263 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
264 {
265 for (unsigned i = 0; i < 2; i++)
266 {
267# ifdef VBOX_WITH_RAW_MODE_NOT_R0
268 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
269 {
270 Assert(pgmMapAreMappingsEnabled(pVM));
271 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
272 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
273 break;
274 }
275# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
276 if (uShw.pPDPae->a[iShw+i].n.u1Present)
277 {
278 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
279 pgmPoolFree(pVM,
280 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
281 pPage->idx,
282 iShw + i);
283 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw+i].u, 0);
284 }
285
286 /* paranoia / a bit assumptive. */
287 if ( (off & 3)
288 && (off & 3) + cbWrite > 4)
289 {
290 const unsigned iShw2 = iShw + 2 + i;
291 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
292 {
293# ifdef VBOX_WITH_RAW_MODE_NOT_R0
294 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
295 {
296 Assert(pgmMapAreMappingsEnabled(pVM));
297 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
298 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
299 break;
300 }
301# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
302 if (uShw.pPDPae->a[iShw2].n.u1Present)
303 {
304 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
305 pgmPoolFree(pVM,
306 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
307 pPage->idx,
308 iShw2);
309 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
310 }
311 }
312 }
313 }
314 }
315 break;
316 }
317
318 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
319 {
320 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
321 const unsigned iShw = off / sizeof(X86PTEPAE);
322 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
323 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
324 {
325 X86PTEPAE GstPte;
326 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
327 AssertRC(rc);
328
329 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
330 pgmPoolTracDerefGCPhysHint(pPool, pPage,
331 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
332 GstPte.u & X86_PTE_PAE_PG_MASK,
333 iShw);
334 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
335 }
336
337 /* paranoia / a bit assumptive. */
338 if ( (off & 7)
339 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
340 {
341 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
342 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
343
344 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
345 {
346 X86PTEPAE GstPte;
347 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
348 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
349 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
350 AssertRC(rc);
351 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
352 pgmPoolTracDerefGCPhysHint(pPool, pPage,
353 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
354 GstPte.u & X86_PTE_PAE_PG_MASK,
355 iShw2);
356 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
357 }
358 }
359 break;
360 }
361
362 case PGMPOOLKIND_32BIT_PD:
363 {
364 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
365 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
366
367 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
368 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
369# ifdef VBOX_WITH_RAW_MODE_NOT_R0
370 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
371 {
372 Assert(pgmMapAreMappingsEnabled(pVM));
373 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
374 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
375 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
376 break;
377 }
378 else
379# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
380 {
381 if (uShw.pPD->a[iShw].n.u1Present)
382 {
383 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
384 pgmPoolFree(pVM,
385 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
386 pPage->idx,
387 iShw);
388 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
389 }
390 }
391 /* paranoia / a bit assumptive. */
392 if ( (off & 3)
393 && (off & 3) + cbWrite > sizeof(X86PTE))
394 {
395 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
396 if ( iShw2 != iShw
397 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
398 {
399# ifdef VBOX_WITH_RAW_MODE_NOT_R0
400 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
401 {
402 Assert(pgmMapAreMappingsEnabled(pVM));
403 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
404 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
405 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
406 break;
407 }
408# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
409 if (uShw.pPD->a[iShw2].n.u1Present)
410 {
411 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
412 pgmPoolFree(pVM,
413 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
414 pPage->idx,
415 iShw2);
416 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
417 }
418 }
419 }
420#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). - not working any longer... */
421 if ( uShw.pPD->a[iShw].n.u1Present
422 && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
423 {
424 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
425# ifdef IN_RC /* TLB load - we're pushing things a bit... */
426 ASMProbeReadByte(pvAddress);
427# endif
428 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
429 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
430 }
431#endif
432 break;
433 }
434
435 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
436 {
437 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
438 const unsigned iShw = off / sizeof(X86PDEPAE);
439 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
440#ifdef VBOX_WITH_RAW_MODE_NOT_R0
441 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
442 {
443 Assert(pgmMapAreMappingsEnabled(pVM));
444 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
445 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
446 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
447 break;
448 }
449#endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
450 /*
451 * Causes trouble when the guest uses a PDE to refer to the whole page table level
452 * structure. (Invalidate here; faults later on when it tries to change the page
453 * table entries -> recheck; probably only applies to the RC case.)
454 */
455#ifdef VBOX_WITH_RAW_MODE_NOT_R0
456 else
457#endif
458 {
459 if (uShw.pPDPae->a[iShw].n.u1Present)
460 {
461 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
462 pgmPoolFree(pVM,
463 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
464 pPage->idx,
465 iShw);
466 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
467 }
468 }
469 /* paranoia / a bit assumptive. */
470 if ( (off & 7)
471 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
472 {
473 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
474 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
475
476#ifdef VBOX_WITH_RAW_MODE_NOT_R0
477 if ( iShw2 != iShw
478 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
479 {
480 Assert(pgmMapAreMappingsEnabled(pVM));
481 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
482 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
483 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
484 break;
485 }
486 else
487#endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
488 if (uShw.pPDPae->a[iShw2].n.u1Present)
489 {
490 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
491 pgmPoolFree(pVM,
492 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
493 pPage->idx,
494 iShw2);
495 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
496 }
497 }
498 break;
499 }
500
501 case PGMPOOLKIND_PAE_PDPT:
502 {
503 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
504 /*
505 * Hopefully this doesn't happen very often:
506 * - touching unused parts of the page
507 * - messing with the bits of pd pointers without changing the physical address
508 */
509 /* PDPT roots are not page aligned; 32 byte only! */
510 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
511
512 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
513 const unsigned iShw = offPdpt / sizeof(X86PDPE);
514 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
515 {
516# ifdef VBOX_WITH_RAW_MODE_NOT_R0
517 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
518 {
519 Assert(pgmMapAreMappingsEnabled(pVM));
520 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
521 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
522 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
523 break;
524 }
525 else
526# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
527 if (uShw.pPDPT->a[iShw].n.u1Present)
528 {
529 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
530 pgmPoolFree(pVM,
531 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
532 pPage->idx,
533 iShw);
534 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
535 }
536
537 /* paranoia / a bit assumptive. */
538 if ( (offPdpt & 7)
539 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
540 {
541 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
542 if ( iShw2 != iShw
543 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
544 {
545# ifdef VBOX_WITH_RAW_MODE_NOT_R0
546 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
547 {
548 Assert(pgmMapAreMappingsEnabled(pVM));
549 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
550 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
551 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
552 break;
553 }
554 else
555# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
556 if (uShw.pPDPT->a[iShw2].n.u1Present)
557 {
558 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
559 pgmPoolFree(pVM,
560 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
561 pPage->idx,
562 iShw2);
563 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
564 }
565 }
566 }
567 }
568 break;
569 }
570
571#ifndef IN_RC
572 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
573 {
574 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
575 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
576 const unsigned iShw = off / sizeof(X86PDEPAE);
577 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
578 if (uShw.pPDPae->a[iShw].n.u1Present)
579 {
580 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
581 pgmPoolFree(pVM,
582 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
583 pPage->idx,
584 iShw);
585 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
586 }
587 /* paranoia / a bit assumptive. */
588 if ( (off & 7)
589 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
590 {
591 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
592 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
593
594 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
595 if (uShw.pPDPae->a[iShw2].n.u1Present)
596 {
597 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
598 pgmPoolFree(pVM,
599 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
600 pPage->idx,
601 iShw2);
602 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
603 }
604 }
605 break;
606 }
607
608 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
609 {
610 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
611 /*
612 * Hopefully this doesn't happen very often:
613 * - messing with the bits of pd pointers without changing the physical address
614 */
615 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
616 const unsigned iShw = off / sizeof(X86PDPE);
617 if (uShw.pPDPT->a[iShw].n.u1Present)
618 {
619 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
620 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
621 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
622 }
623 /* paranoia / a bit assumptive. */
624 if ( (off & 7)
625 && (off & 7) + cbWrite > sizeof(X86PDPE))
626 {
627 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
628 if (uShw.pPDPT->a[iShw2].n.u1Present)
629 {
630 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
631 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
632 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
633 }
634 }
635 break;
636 }
637
638 case PGMPOOLKIND_64BIT_PML4:
639 {
640 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
641 /*
642 * Hopefully this doesn't happen very often:
643 * - messing with the bits of pd pointers without changing the physical address
644 */
645 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
646 const unsigned iShw = off / sizeof(X86PDPE);
647 if (uShw.pPML4->a[iShw].n.u1Present)
648 {
649 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
650 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
651 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
652 }
653 /* paranoia / a bit assumptive. */
654 if ( (off & 7)
655 && (off & 7) + cbWrite > sizeof(X86PDPE))
656 {
657 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
658 if (uShw.pPML4->a[iShw2].n.u1Present)
659 {
660 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
661 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
662 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
663 }
664 }
665 break;
666 }
667#endif /* IN_RING0 */
668
669 default:
670 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
671 }
672 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
673
674 /* next */
675 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
676 return;
677 pPage = &pPool->aPages[pPage->iMonitoredNext];
678 }
679}
680
681# ifndef IN_RING3
682
683/**
684 * Checks if a access could be a fork operation in progress.
685 *
686 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
687 *
688 * @returns true if it's likely that we're forking, otherwise false.
689 * @param pPool The pool.
690 * @param pDis The disassembled instruction.
691 * @param offFault The access offset.
692 */
693DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
694{
695 /*
696 * i386 linux is using btr to clear X86_PTE_RW.
697 * The functions involved are (2.6.16 source inspection):
698 * clear_bit
699 * ptep_set_wrprotect
700 * copy_one_pte
701 * copy_pte_range
702 * copy_pmd_range
703 * copy_pud_range
704 * copy_page_range
705 * dup_mmap
706 * dup_mm
707 * copy_mm
708 * copy_process
709 * do_fork
710 */
711 if ( pDis->pCurInstr->uOpcode == OP_BTR
712 && !(offFault & 4)
713 /** @todo Validate that the bit index is X86_PTE_RW. */
714 )
715 {
716 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
717 return true;
718 }
719 return false;
720}
721
722
723/**
724 * Determine whether the page is likely to have been reused.
725 *
726 * @returns true if we consider the page as being reused for a different purpose.
727 * @returns false if we consider it to still be a paging page.
728 * @param pVM Pointer to the VM.
729 * @param pVCpu Pointer to the VMCPU.
730 * @param pRegFrame Trap register frame.
731 * @param pDis The disassembly info for the faulting instruction.
732 * @param pvFault The fault address.
733 *
734 * @remark The REP prefix check is left to the caller because of STOSD/W.
735 */
736DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
737{
738#ifndef IN_RC
739 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
740 if ( HMHasPendingIrq(pVM)
741 && (pRegFrame->rsp - pvFault) < 32)
742 {
743 /* Fault caused by stack writes while trying to inject an interrupt event. */
744 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
745 return true;
746 }
747#else
748 NOREF(pVM); NOREF(pvFault);
749#endif
750
751 LogFlow(("Reused instr %RGv %d at %RGv param1.fUse=%llx param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->uOpcode, pvFault, pDis->Param1.fUse, pDis->Param1.Base.idxGenReg));
752
753 /* Non-supervisor mode write means it's used for something else. */
754 if (CPUMGetGuestCPL(pVCpu) == 3)
755 return true;
756
757 switch (pDis->pCurInstr->uOpcode)
758 {
759 /* call implies the actual push of the return address faulted */
760 case OP_CALL:
761 Log4(("pgmPoolMonitorIsReused: CALL\n"));
762 return true;
763 case OP_PUSH:
764 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
765 return true;
766 case OP_PUSHF:
767 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
768 return true;
769 case OP_PUSHA:
770 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
771 return true;
772 case OP_FXSAVE:
773 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
774 return true;
775 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
776 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
777 return true;
778 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
779 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
780 return true;
781 case OP_MOVSWD:
782 case OP_STOSWD:
783 if ( pDis->fPrefix == (DISPREFIX_REP|DISPREFIX_REX)
784 && pRegFrame->rcx >= 0x40
785 )
786 {
787 Assert(pDis->uCpuMode == DISCPUMODE_64BIT);
788
789 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
790 return true;
791 }
792 return false;
793 }
794 if ( ( (pDis->Param1.fUse & DISUSE_REG_GEN32)
795 || (pDis->Param1.fUse & DISUSE_REG_GEN64))
796 && (pDis->Param1.Base.idxGenReg == DISGREG_ESP))
797 {
798 Log4(("pgmPoolMonitorIsReused: ESP\n"));
799 return true;
800 }
801
802 return false;
803}
804
805
806/**
807 * Flushes the page being accessed.
808 *
809 * @returns VBox status code suitable for scheduling.
810 * @param pVM Pointer to the VM.
811 * @param pVCpu Pointer to the VMCPU.
812 * @param pPool The pool.
813 * @param pPage The pool page (head).
814 * @param pDis The disassembly of the write instruction.
815 * @param pRegFrame The trap register frame.
816 * @param GCPhysFault The fault address as guest physical address.
817 * @param pvFault The fault address.
818 * @todo VBOXSTRICTRC
819 */
820static int pgmPoolAccessPfHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
821 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
822{
823 NOREF(pVM); NOREF(GCPhysFault);
824
825 /*
826 * First, do the flushing.
827 */
828 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
829
830 /*
831 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
832 * Must do this in raw mode (!); XP boot will fail otherwise.
833 */
834 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
835 if (rc2 == VINF_SUCCESS)
836 { /* do nothing */ }
837#ifdef VBOX_WITH_IEM
838 else if (rc2 == VINF_EM_RESCHEDULE)
839 {
840 if (rc == VINF_SUCCESS)
841 rc = VBOXSTRICTRC_VAL(rc2);
842# ifndef IN_RING3
843 VMCPU_FF_SET(pVCpu, VMCPU_FF_TO_R3);
844# endif
845 }
846#endif
847 else if (rc2 == VERR_EM_INTERPRETER)
848 {
849#ifdef IN_RC
850 if (PATMIsPatchGCAddr(pVM, pRegFrame->eip))
851 {
852 LogFlow(("pgmPoolAccessPfHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
853 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->eip));
854 rc = VINF_SUCCESS;
855 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
856 }
857 else
858#endif
859 {
860 rc = VINF_EM_RAW_EMULATE_INSTR;
861 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
862 }
863 }
864 else if (RT_FAILURE_NP(rc2))
865 rc = VBOXSTRICTRC_VAL(rc2);
866 else
867 AssertMsgFailed(("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
868
869 LogFlow(("pgmPoolAccessPfHandlerPT: returns %Rrc (flushed)\n", rc));
870 return rc;
871}
872
873
874/**
875 * Handles the STOSD write accesses.
876 *
877 * @returns VBox status code suitable for scheduling.
878 * @param pVM Pointer to the VM.
879 * @param pPool The pool.
880 * @param pPage The pool page (head).
881 * @param pDis The disassembly of the write instruction.
882 * @param pRegFrame The trap register frame.
883 * @param GCPhysFault The fault address as guest physical address.
884 * @param pvFault The fault address.
885 */
886DECLINLINE(int) pgmPoolAccessPfHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
887 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
888{
889 unsigned uIncrement = pDis->Param1.cb;
890 NOREF(pVM);
891
892 Assert(pDis->uCpuMode == DISCPUMODE_32BIT || pDis->uCpuMode == DISCPUMODE_64BIT);
893 Assert(pRegFrame->rcx <= 0x20);
894
895#ifdef VBOX_STRICT
896 if (pDis->uOpMode == DISCPUMODE_32BIT)
897 Assert(uIncrement == 4);
898 else
899 Assert(uIncrement == 8);
900#endif
901
902 Log3(("pgmPoolAccessPfHandlerSTOSD\n"));
903
904 /*
905 * Increment the modification counter and insert it into the list
906 * of modified pages the first time.
907 */
908 if (!pPage->cModifications++)
909 pgmPoolMonitorModifiedInsert(pPool, pPage);
910
911 /*
912 * Execute REP STOSD.
913 *
914 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
915 * write situation, meaning that it's safe to write here.
916 */
917 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
918 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
919 while (pRegFrame->rcx)
920 {
921#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
922 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
923 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
924 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
925#else
926 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
927#endif
928#ifdef IN_RC
929 *(uint32_t *)(uintptr_t)pu32 = pRegFrame->eax;
930#else
931 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
932#endif
933 pu32 += uIncrement;
934 GCPhysFault += uIncrement;
935 pRegFrame->rdi += uIncrement;
936 pRegFrame->rcx--;
937 }
938 pRegFrame->rip += pDis->cbInstr;
939
940 LogFlow(("pgmPoolAccessPfHandlerSTOSD: returns\n"));
941 return VINF_SUCCESS;
942}
943
944
945/**
946 * Handles the simple write accesses.
947 *
948 * @returns VBox status code suitable for scheduling.
949 * @param pVM Pointer to the VM.
950 * @param pVCpu Pointer to the VMCPU.
951 * @param pPool The pool.
952 * @param pPage The pool page (head).
953 * @param pDis The disassembly of the write instruction.
954 * @param pRegFrame The trap register frame.
955 * @param GCPhysFault The fault address as guest physical address.
956 * @param pvFault The fault address.
957 * @param pfReused Reused state (in/out)
958 */
959DECLINLINE(int) pgmPoolAccessPfHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
960 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
961{
962 Log3(("pgmPoolAccessPfHandlerSimple\n"));
963 NOREF(pVM);
964 NOREF(pfReused); /* initialized by caller */
965
966 /*
967 * Increment the modification counter and insert it into the list
968 * of modified pages the first time.
969 */
970 if (!pPage->cModifications++)
971 pgmPoolMonitorModifiedInsert(pPool, pPage);
972
973 /*
974 * Clear all the pages. ASSUMES that pvFault is readable.
975 */
976#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
977 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
978#endif
979
980 uint32_t cbWrite = DISGetParamSize(pDis, &pDis->Param1);
981 if (cbWrite <= 8)
982 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, cbWrite);
983 else
984 {
985 Assert(cbWrite <= 16);
986 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, 8);
987 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + 8, NULL, cbWrite - 8);
988 }
989
990#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
991 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
992#endif
993
994 /*
995 * Interpret the instruction.
996 */
997 VBOXSTRICTRC rc = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
998 if (RT_SUCCESS(rc))
999 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
1000 else if (rc == VERR_EM_INTERPRETER)
1001 {
1002 LogFlow(("pgmPoolAccessPfHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1003 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode));
1004 rc = VINF_EM_RAW_EMULATE_INSTR;
1005 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1006 }
1007
1008#if 0 /* experimental code */
1009 if (rc == VINF_SUCCESS)
1010 {
1011 switch (pPage->enmKind)
1012 {
1013 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1014 {
1015 X86PTEPAE GstPte;
1016 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1017 AssertRC(rc);
1018
1019 /* Check the new value written by the guest. If present and with a bogus physical address, then
1020 * it's fairly safe to assume the guest is reusing the PT.
1021 */
1022 if (GstPte.n.u1Present)
1023 {
1024 RTHCPHYS HCPhys = -1;
1025 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1026 if (rc != VINF_SUCCESS)
1027 {
1028 *pfReused = true;
1029 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1030 }
1031 }
1032 break;
1033 }
1034 }
1035 }
1036#endif
1037
1038 LogFlow(("pgmPoolAccessPfHandlerSimple: returns %Rrc\n", VBOXSTRICTRC_VAL(rc)));
1039 return VBOXSTRICTRC_VAL(rc);
1040}
1041
1042
1043/**
1044 * \#PF Handler callback for PT write accesses.
1045 *
1046 * @returns VBox status code (appropriate for GC return).
1047 * @param pVM Pointer to the VM.
1048 * @param pVCpu Pointer to the cross context CPU context for the
1049 * calling EMT.
1050 * @param uErrorCode CPU Error code.
1051 * @param pRegFrame Trap register frame.
1052 * NULL on DMA and other non CPU access.
1053 * @param pvFault The fault address (cr2).
1054 * @param GCPhysFault The GC physical address corresponding to pvFault.
1055 * @param pvUser User argument.
1056 */
1057DECLEXPORT(int) pgmPoolAccessPfHandler(PVM pVM, PVMCPU pVCpu, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault,
1058 RTGCPHYS GCPhysFault, void *pvUser)
1059{
1060 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1061 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1062 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1063 unsigned cMaxModifications;
1064 bool fForcedFlush = false;
1065 NOREF(uErrorCode);
1066
1067 LogFlow(("pgmPoolAccessPfHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1068
1069 pgmLock(pVM);
1070 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1071 {
1072 /* Pool page changed while we were waiting for the lock; ignore. */
1073 Log(("CPU%d: pgmPoolAccessPfHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1074 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1075 pgmUnlock(pVM);
1076 return VINF_SUCCESS;
1077 }
1078#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1079 if (pPage->fDirty)
1080 {
1081 Assert(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH));
1082 pgmUnlock(pVM);
1083 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1084 }
1085#endif
1086
1087#if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1088 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1089 {
1090 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1091 void *pvGst;
1092 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1093 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1094 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1095 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1096 }
1097#endif
1098
1099 /*
1100 * Disassemble the faulting instruction.
1101 */
1102 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1103 int rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
1104 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1105 {
1106 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1107 pgmUnlock(pVM);
1108 return rc;
1109 }
1110
1111 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1112
1113 /*
1114 * We should ALWAYS have the list head as user parameter. This
1115 * is because we use that page to record the changes.
1116 */
1117 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1118
1119#ifdef IN_RING0
1120 /* Maximum nr of modifications depends on the page type. */
1121 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1122 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1123 cMaxModifications = 4;
1124 else
1125 cMaxModifications = 24;
1126#else
1127 cMaxModifications = 48;
1128#endif
1129
1130 /*
1131 * Incremental page table updates should weigh more than random ones.
1132 * (Only applies when started from offset 0)
1133 */
1134 pVCpu->pgm.s.cPoolAccessHandler++;
1135 if ( pPage->GCPtrLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1136 && pPage->GCPtrLastAccessHandlerRip < pRegFrame->rip + 0x40
1137 && pvFault == (pPage->GCPtrLastAccessHandlerFault + pDis->Param1.cb)
1138 && pVCpu->pgm.s.cPoolAccessHandler == pPage->cLastAccessHandler + 1)
1139 {
1140 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1141 Assert(pPage->cModifications < 32000);
1142 pPage->cModifications = pPage->cModifications * 2;
1143 pPage->GCPtrLastAccessHandlerFault = pvFault;
1144 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1145 if (pPage->cModifications >= cMaxModifications)
1146 {
1147 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1148 fForcedFlush = true;
1149 }
1150 }
1151
1152 if (pPage->cModifications >= cMaxModifications)
1153 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1154
1155 /*
1156 * Check if it's worth dealing with.
1157 */
1158 bool fReused = false;
1159 bool fNotReusedNotForking = false;
1160 if ( ( pPage->cModifications < cMaxModifications /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1161 || pgmPoolIsPageLocked(pPage)
1162 )
1163 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1164 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1165 {
1166 /*
1167 * Simple instructions, no REP prefix.
1168 */
1169 if (!(pDis->fPrefix & (DISPREFIX_REP | DISPREFIX_REPNE)))
1170 {
1171 rc = pgmPoolAccessPfHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1172 if (fReused)
1173 goto flushPage;
1174
1175 /* A mov instruction to change the first page table entry will be remembered so we can detect
1176 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1177 */
1178 if ( rc == VINF_SUCCESS
1179 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1180 && pDis->pCurInstr->uOpcode == OP_MOV
1181 && (pvFault & PAGE_OFFSET_MASK) == 0)
1182 {
1183 pPage->GCPtrLastAccessHandlerFault = pvFault;
1184 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1185 pPage->GCPtrLastAccessHandlerRip = pRegFrame->rip;
1186 /* Make sure we don't kick out a page too quickly. */
1187 if (pPage->cModifications > 8)
1188 pPage->cModifications = 2;
1189 }
1190 else if (pPage->GCPtrLastAccessHandlerFault == pvFault)
1191 {
1192 /* ignore the 2nd write to this page table entry. */
1193 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1194 }
1195 else
1196 {
1197 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
1198 pPage->GCPtrLastAccessHandlerRip = 0;
1199 }
1200
1201 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1202 pgmUnlock(pVM);
1203 return rc;
1204 }
1205
1206 /*
1207 * Windows is frequently doing small memset() operations (netio test 4k+).
1208 * We have to deal with these or we'll kill the cache and performance.
1209 */
1210 if ( pDis->pCurInstr->uOpcode == OP_STOSWD
1211 && !pRegFrame->eflags.Bits.u1DF
1212 && pDis->uOpMode == pDis->uCpuMode
1213 && pDis->uAddrMode == pDis->uCpuMode)
1214 {
1215 bool fValidStosd = false;
1216
1217 if ( pDis->uCpuMode == DISCPUMODE_32BIT
1218 && pDis->fPrefix == DISPREFIX_REP
1219 && pRegFrame->ecx <= 0x20
1220 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1221 && !((uintptr_t)pvFault & 3)
1222 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1223 )
1224 {
1225 fValidStosd = true;
1226 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1227 }
1228 else
1229 if ( pDis->uCpuMode == DISCPUMODE_64BIT
1230 && pDis->fPrefix == (DISPREFIX_REP | DISPREFIX_REX)
1231 && pRegFrame->rcx <= 0x20
1232 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1233 && !((uintptr_t)pvFault & 7)
1234 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1235 )
1236 {
1237 fValidStosd = true;
1238 }
1239
1240 if (fValidStosd)
1241 {
1242 rc = pgmPoolAccessPfHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1243 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1244 pgmUnlock(pVM);
1245 return rc;
1246 }
1247 }
1248
1249 /* REP prefix, don't bother. */
1250 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1251 Log4(("pgmPoolAccessPfHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1252 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode, pDis->fPrefix));
1253 fNotReusedNotForking = true;
1254 }
1255
1256#if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1257 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1258 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1259 */
1260 if ( pPage->cModifications >= cMaxModifications
1261 && !fForcedFlush
1262 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1263 && ( fNotReusedNotForking
1264 || ( !pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
1265 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1266 )
1267 )
1268 {
1269 Assert(!pgmPoolIsPageLocked(pPage));
1270 Assert(pPage->fDirty == false);
1271
1272 /* Flush any monitored duplicates as we will disable write protection. */
1273 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1274 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1275 {
1276 PPGMPOOLPAGE pPageHead = pPage;
1277
1278 /* Find the monitor head. */
1279 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1280 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1281
1282 while (pPageHead)
1283 {
1284 unsigned idxNext = pPageHead->iMonitoredNext;
1285
1286 if (pPageHead != pPage)
1287 {
1288 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1289 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1290 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1291 AssertRC(rc2);
1292 }
1293
1294 if (idxNext == NIL_PGMPOOL_IDX)
1295 break;
1296
1297 pPageHead = &pPool->aPages[idxNext];
1298 }
1299 }
1300
1301 /* The flushing above might fail for locked pages, so double check. */
1302 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1303 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1304 {
1305 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1306
1307 /* Temporarily allow write access to the page table again. */
1308 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK, pPage->GCPhys & PAGE_BASE_GC_MASK);
1309 if (rc == VINF_SUCCESS)
1310 {
1311 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1312 AssertMsg(rc == VINF_SUCCESS
1313 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1314 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1315 || rc == VERR_PAGE_NOT_PRESENT,
1316 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1317# ifdef VBOX_STRICT
1318 pPage->GCPtrDirtyFault = pvFault;
1319# endif
1320
1321 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1322 pgmUnlock(pVM);
1323 return rc;
1324 }
1325 }
1326 }
1327#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1328
1329 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushModOverflow));
1330flushPage:
1331 /*
1332 * Not worth it, so flush it.
1333 *
1334 * If we considered it to be reused, don't go back to ring-3
1335 * to emulate failed instructions since we usually cannot
1336 * interpret then. This may be a bit risky, in which case
1337 * the reuse detection must be fixed.
1338 */
1339 rc = pgmPoolAccessPfHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1340 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1341 && fReused)
1342 {
1343 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1344 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1345 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1346 }
1347 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1348 pgmUnlock(pVM);
1349 return rc;
1350}
1351
1352# endif /* !IN_RING3 */
1353
1354/**
1355 * Access handler callback for PT write accesses.
1356 *
1357 * The handler can not raise any faults, it's mainly for monitoring write access
1358 * to certain pages.
1359 *
1360 * @returns VINF_SUCCESS if the handler has carried out the operation.
1361 * @returns VINF_PGM_HANDLER_DO_DEFAULT if the caller should carry out the access operation.
1362 * @param pVM Pointer to the VM.
1363 * @param pVCpu The cross context CPU structure for the calling EMT.
1364 * @param GCPhys The physical address the guest is writing to.
1365 * @param pvPhys The HC mapping of that address.
1366 * @param pvBuf What the guest is reading/writing.
1367 * @param cbBuf How much it's reading/writing.
1368 * @param enmAccessType The access type.
1369 * @param enmOrigin Who is making the access.
1370 * @param pvUser User argument.
1371 */
1372PGM_ALL_CB2_DECL(int) pgmPoolAccessHandler(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf,
1373 PGMACCESSTYPE enmAccessType, PGMACCESSORIGIN enmOrigin, void *pvUser)
1374{
1375 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1376 STAM_PROFILE_START(&pPool->StatMonitorR3, a);
1377 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1378 LogFlow(("PGM_ALL_CB_DECL: GCPhys=%RGp %p:{.Core=%RHp, .idx=%d, .GCPhys=%RGp, .enmType=%d}\n",
1379 GCPhys, pPage, pPage->Core.Key, pPage->idx, pPage->GCPhys, pPage->enmKind));
1380
1381 NOREF(pvBuf); NOREF(enmAccessType);
1382
1383 /*
1384 * Make sure the pool page wasn't modified by a different CPU.
1385 */
1386 pgmLock(pVM);
1387 if (PHYS_PAGE_ADDRESS(GCPhys) == PHYS_PAGE_ADDRESS(pPage->GCPhys))
1388 {
1389 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1390
1391 /* The max modification count before flushing depends on the context and page type. */
1392#ifdef IN_RING3
1393 uint16_t const cMaxModifications = 96; /* it's cheaper here, right? */
1394#else
1395 uint16_t cMaxModifications;
1396 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1397 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1398 cMaxModifications = 4;
1399 else
1400 cMaxModifications = 24;
1401# ifdef IN_RC
1402 cMaxModifications *= 2; /* traps are cheaper than exists. */
1403# endif
1404#endif
1405
1406 /*
1407 * We don't have to be very sophisticated about this since there are relativly few calls here.
1408 * However, we must try our best to detect any non-cpu accesses (disk / networking).
1409 */
1410 if ( ( pPage->cModifications < cMaxModifications
1411 || pgmPoolIsPageLocked(pPage) )
1412 && enmOrigin != PGMACCESSORIGIN_DEVICE
1413 && cbBuf <= 16)
1414 {
1415 /* Clear the shadow entry. */
1416 if (!pPage->cModifications++)
1417 pgmPoolMonitorModifiedInsert(pPool, pPage);
1418
1419 if (cbBuf <= 8)
1420 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, (uint32_t)cbBuf);
1421 else
1422 {
1423 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, 8);
1424 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys + 8, (uint8_t *)pvBuf + 8, (uint32_t)cbBuf - 8);
1425 }
1426 }
1427 else
1428 {
1429 /* ASSUME that VERR_PGM_POOL_CLEARED can be ignored here and that FFs will deal with it in due time. */
1430 pgmPoolMonitorChainFlush(pPool, pPage);
1431 }
1432
1433 STAM_PROFILE_STOP_EX(&pPool->StatMonitorR3, &pPool->StatMonitorR3FlushPage, a);
1434 }
1435 else
1436 Log(("CPU%d: PGM_ALL_CB_DECL pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhys), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1437 pgmUnlock(pVM);
1438 return VINF_PGM_HANDLER_DO_DEFAULT;
1439}
1440
1441
1442# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1443
1444# if defined(VBOX_STRICT) && !defined(IN_RING3)
1445
1446/**
1447 * Check references to guest physical memory in a PAE / PAE page table.
1448 *
1449 * @param pPool The pool.
1450 * @param pPage The page.
1451 * @param pShwPT The shadow page table (mapping of the page).
1452 * @param pGstPT The guest page table.
1453 */
1454static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1455{
1456 unsigned cErrors = 0;
1457 int LastRc = -1; /* initialized to shut up gcc */
1458 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1459 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1460 PVM pVM = pPool->CTX_SUFF(pVM);
1461
1462#ifdef VBOX_STRICT
1463 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1464 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1465#endif
1466 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1467 {
1468 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1469 {
1470 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1471 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1472 if ( rc != VINF_SUCCESS
1473 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1474 {
1475 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1476 LastPTE = i;
1477 LastRc = rc;
1478 LastHCPhys = HCPhys;
1479 cErrors++;
1480
1481 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1482 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1483 AssertRC(rc);
1484
1485 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1486 {
1487 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1488
1489 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1490 {
1491 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1492
1493 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1494 {
1495 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1496 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1497 {
1498 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1499 }
1500 }
1501
1502 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1503 }
1504 }
1505 }
1506 }
1507 }
1508 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1509}
1510
1511
1512/**
1513 * Check references to guest physical memory in a PAE / 32-bit page table.
1514 *
1515 * @param pPool The pool.
1516 * @param pPage The page.
1517 * @param pShwPT The shadow page table (mapping of the page).
1518 * @param pGstPT The guest page table.
1519 */
1520static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1521{
1522 unsigned cErrors = 0;
1523 int LastRc = -1; /* initialized to shut up gcc */
1524 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1525 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1526 PVM pVM = pPool->CTX_SUFF(pVM);
1527
1528#ifdef VBOX_STRICT
1529 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1530 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1531#endif
1532 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1533 {
1534 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1535 {
1536 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1537 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1538 if ( rc != VINF_SUCCESS
1539 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1540 {
1541 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1542 LastPTE = i;
1543 LastRc = rc;
1544 LastHCPhys = HCPhys;
1545 cErrors++;
1546
1547 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1548 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1549 AssertRC(rc);
1550
1551 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1552 {
1553 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1554
1555 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1556 {
1557 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1558
1559 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1560 {
1561 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1562 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1563 {
1564 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1565 }
1566 }
1567
1568 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1569 }
1570 }
1571 }
1572 }
1573 }
1574 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1575}
1576
1577# endif /* VBOX_STRICT && !IN_RING3 */
1578
1579/**
1580 * Clear references to guest physical memory in a PAE / PAE page table.
1581 *
1582 * @returns nr of changed PTEs
1583 * @param pPool The pool.
1584 * @param pPage The page.
1585 * @param pShwPT The shadow page table (mapping of the page).
1586 * @param pGstPT The guest page table.
1587 * @param pOldGstPT The old cached guest page table.
1588 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1589 * @param pfFlush Flush reused page table (out)
1590 */
1591DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1592 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1593{
1594 unsigned cChanged = 0;
1595
1596#ifdef VBOX_STRICT
1597 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1598 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1599#endif
1600 *pfFlush = false;
1601
1602 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1603 {
1604 /* Check the new value written by the guest. If present and with a bogus physical address, then
1605 * it's fairly safe to assume the guest is reusing the PT.
1606 */
1607 if ( fAllowRemoval
1608 && pGstPT->a[i].n.u1Present)
1609 {
1610 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1611 {
1612 *pfFlush = true;
1613 return ++cChanged;
1614 }
1615 }
1616 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1617 {
1618 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1619 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1620 {
1621#ifdef VBOX_STRICT
1622 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1623 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1624 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1625#endif
1626 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1627 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1628 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1629 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1630
1631 if ( uHostAttr == uGuestAttr
1632 && fHostRW <= fGuestRW)
1633 continue;
1634 }
1635 cChanged++;
1636 /* Something was changed, so flush it. */
1637 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1638 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1639 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1640 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1641 }
1642 }
1643 return cChanged;
1644}
1645
1646
1647/**
1648 * Clear references to guest physical memory in a PAE / PAE page table.
1649 *
1650 * @returns nr of changed PTEs
1651 * @param pPool The pool.
1652 * @param pPage The page.
1653 * @param pShwPT The shadow page table (mapping of the page).
1654 * @param pGstPT The guest page table.
1655 * @param pOldGstPT The old cached guest page table.
1656 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1657 * @param pfFlush Flush reused page table (out)
1658 */
1659DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1660 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1661{
1662 unsigned cChanged = 0;
1663
1664#ifdef VBOX_STRICT
1665 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1666 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1667#endif
1668 *pfFlush = false;
1669
1670 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1671 {
1672 /* Check the new value written by the guest. If present and with a bogus physical address, then
1673 * it's fairly safe to assume the guest is reusing the PT.
1674 */
1675 if ( fAllowRemoval
1676 && pGstPT->a[i].n.u1Present)
1677 {
1678 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK))
1679 {
1680 *pfFlush = true;
1681 return ++cChanged;
1682 }
1683 }
1684 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1685 {
1686 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1687 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1688 {
1689#ifdef VBOX_STRICT
1690 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1691 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1692 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1693#endif
1694 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1695 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1696 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1697 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1698
1699 if ( uHostAttr == uGuestAttr
1700 && fHostRW <= fGuestRW)
1701 continue;
1702 }
1703 cChanged++;
1704 /* Something was changed, so flush it. */
1705 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1706 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1707 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1708 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1709 }
1710 }
1711 return cChanged;
1712}
1713
1714
1715/**
1716 * Flush a dirty page
1717 *
1718 * @param pVM Pointer to the VM.
1719 * @param pPool The pool.
1720 * @param idxSlot Dirty array slot index
1721 * @param fAllowRemoval Allow a reused page table to be removed
1722 */
1723static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1724{
1725 PPGMPOOLPAGE pPage;
1726 unsigned idxPage;
1727
1728 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1729 if (pPool->aDirtyPages[idxSlot].uIdx == NIL_PGMPOOL_IDX)
1730 return;
1731
1732 idxPage = pPool->aDirtyPages[idxSlot].uIdx;
1733 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1734 pPage = &pPool->aPages[idxPage];
1735 Assert(pPage->idx == idxPage);
1736 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1737
1738 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1739 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1740
1741#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1742 PVMCPU pVCpu = VMMGetCpu(pVM);
1743 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
1744#endif
1745
1746 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1747 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK);
1748 Assert(rc == VINF_SUCCESS);
1749 pPage->fDirty = false;
1750
1751#ifdef VBOX_STRICT
1752 uint64_t fFlags = 0;
1753 RTHCPHYS HCPhys;
1754 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->GCPtrDirtyFault, &fFlags, &HCPhys);
1755 AssertMsg( ( rc == VINF_SUCCESS
1756 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1757 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1758 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1759 || rc == VERR_PAGE_NOT_PRESENT,
1760 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->GCPtrDirtyFault, rc, fFlags));
1761#endif
1762
1763 /* Flush those PTEs that have changed. */
1764 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1765 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1766 void *pvGst;
1767 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1768 bool fFlush;
1769 unsigned cChanges;
1770
1771 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1772 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1773 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1774 else
1775 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1776 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1777
1778 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1779 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1780 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1781 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1782
1783 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1784 Assert(pPage->cModifications);
1785 if (cChanges < 4)
1786 pPage->cModifications = 1; /* must use > 0 here */
1787 else
1788 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1789
1790 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1791 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1792 pPool->idxFreeDirtyPage = idxSlot;
1793
1794 pPool->cDirtyPages--;
1795 pPool->aDirtyPages[idxSlot].uIdx = NIL_PGMPOOL_IDX;
1796 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1797 if (fFlush)
1798 {
1799 Assert(fAllowRemoval);
1800 Log(("Flush reused page table!\n"));
1801 pgmPoolFlushPage(pPool, pPage);
1802 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1803 }
1804 else
1805 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1806
1807#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1808 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
1809#endif
1810}
1811
1812
1813# ifndef IN_RING3
1814/**
1815 * Add a new dirty page
1816 *
1817 * @param pVM Pointer to the VM.
1818 * @param pPool The pool.
1819 * @param pPage The page.
1820 */
1821void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1822{
1823 unsigned idxFree;
1824
1825 PGM_LOCK_ASSERT_OWNER(pVM);
1826 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1827 Assert(!pPage->fDirty);
1828
1829 idxFree = pPool->idxFreeDirtyPage;
1830 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1831 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1832
1833 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1834 {
1835 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1836 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1837 }
1838 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1839 AssertMsg(pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1840
1841 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1842
1843 /*
1844 * Make a copy of the guest page table as we require valid GCPhys addresses
1845 * when removing references to physical pages.
1846 * (The HCPhys linear lookup is *extremely* expensive!)
1847 */
1848 void *pvGst;
1849 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1850 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst, (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT) ? PAGE_SIZE : PAGE_SIZE/2);
1851# ifdef VBOX_STRICT
1852 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1853 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1854 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1855 else
1856 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1857 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1858# endif
1859 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1860
1861 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1862 pPage->fDirty = true;
1863 pPage->idxDirtyEntry = (uint8_t)idxFree; Assert(pPage->idxDirtyEntry == idxFree);
1864 pPool->aDirtyPages[idxFree].uIdx = pPage->idx;
1865 pPool->cDirtyPages++;
1866
1867 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1868 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1869 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1870 {
1871 unsigned i;
1872 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1873 {
1874 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1875 if (pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX)
1876 {
1877 pPool->idxFreeDirtyPage = idxFree;
1878 break;
1879 }
1880 }
1881 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1882 }
1883
1884 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX);
1885
1886 /*
1887 * Clear all references to this shadow table. See @bugref{7298}.
1888 */
1889 pgmPoolTrackClearPageUsers(pPool, pPage);
1890}
1891# endif /* !IN_RING3 */
1892
1893
1894/**
1895 * Check if the specified page is dirty (not write monitored)
1896 *
1897 * @return dirty or not
1898 * @param pVM Pointer to the VM.
1899 * @param GCPhys Guest physical address
1900 */
1901bool pgmPoolIsDirtyPage(PVM pVM, RTGCPHYS GCPhys)
1902{
1903 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1904 PGM_LOCK_ASSERT_OWNER(pVM);
1905 if (!pPool->cDirtyPages)
1906 return false;
1907
1908 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1909
1910 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1911 {
1912 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
1913 {
1914 PPGMPOOLPAGE pPage;
1915 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
1916
1917 pPage = &pPool->aPages[idxPage];
1918 if (pPage->GCPhys == GCPhys)
1919 return true;
1920 }
1921 }
1922 return false;
1923}
1924
1925
1926/**
1927 * Reset all dirty pages by reinstating page monitoring.
1928 *
1929 * @param pVM Pointer to the VM.
1930 */
1931void pgmPoolResetDirtyPages(PVM pVM)
1932{
1933 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1934 PGM_LOCK_ASSERT_OWNER(pVM);
1935 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1936
1937 if (!pPool->cDirtyPages)
1938 return;
1939
1940 Log(("pgmPoolResetDirtyPages\n"));
1941 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1942 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1943
1944 pPool->idxFreeDirtyPage = 0;
1945 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1946 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1947 {
1948 unsigned i;
1949 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1950 {
1951 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
1952 {
1953 pPool->idxFreeDirtyPage = i;
1954 break;
1955 }
1956 }
1957 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1958 }
1959
1960 Assert(pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
1961 return;
1962}
1963
1964
1965/**
1966 * Invalidate the PT entry for the specified page
1967 *
1968 * @param pVM Pointer to the VM.
1969 * @param GCPtrPage Guest page to invalidate
1970 */
1971void pgmPoolResetDirtyPage(PVM pVM, RTGCPTR GCPtrPage)
1972{
1973 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1974 PGM_LOCK_ASSERT_OWNER(pVM);
1975 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1976
1977 if (!pPool->cDirtyPages)
1978 return;
1979
1980 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage));
1981 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1982 {
1983 }
1984}
1985
1986
1987/**
1988 * Reset all dirty pages by reinstating page monitoring.
1989 *
1990 * @param pVM Pointer to the VM.
1991 * @param GCPhysPT Physical address of the page table
1992 */
1993void pgmPoolInvalidateDirtyPage(PVM pVM, RTGCPHYS GCPhysPT)
1994{
1995 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1996 PGM_LOCK_ASSERT_OWNER(pVM);
1997 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1998 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
1999
2000 if (!pPool->cDirtyPages)
2001 return;
2002
2003 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2004
2005 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2006 {
2007 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
2008 {
2009 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
2010
2011 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
2012 if (pPage->GCPhys == GCPhysPT)
2013 {
2014 idxDirtyPage = i;
2015 break;
2016 }
2017 }
2018 }
2019
2020 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
2021 {
2022 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
2023 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
2024 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
2025 {
2026 unsigned i;
2027 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2028 {
2029 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
2030 {
2031 pPool->idxFreeDirtyPage = i;
2032 break;
2033 }
2034 }
2035 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
2036 }
2037 }
2038}
2039
2040# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
2041
2042/**
2043 * Inserts a page into the GCPhys hash table.
2044 *
2045 * @param pPool The pool.
2046 * @param pPage The page.
2047 */
2048DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2049{
2050 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
2051 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2052 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2053 pPage->iNext = pPool->aiHash[iHash];
2054 pPool->aiHash[iHash] = pPage->idx;
2055}
2056
2057
2058/**
2059 * Removes a page from the GCPhys hash table.
2060 *
2061 * @param pPool The pool.
2062 * @param pPage The page.
2063 */
2064DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2065{
2066 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
2067 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2068 if (pPool->aiHash[iHash] == pPage->idx)
2069 pPool->aiHash[iHash] = pPage->iNext;
2070 else
2071 {
2072 uint16_t iPrev = pPool->aiHash[iHash];
2073 for (;;)
2074 {
2075 const int16_t i = pPool->aPages[iPrev].iNext;
2076 if (i == pPage->idx)
2077 {
2078 pPool->aPages[iPrev].iNext = pPage->iNext;
2079 break;
2080 }
2081 if (i == NIL_PGMPOOL_IDX)
2082 {
2083 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
2084 break;
2085 }
2086 iPrev = i;
2087 }
2088 }
2089 pPage->iNext = NIL_PGMPOOL_IDX;
2090}
2091
2092
2093/**
2094 * Frees up one cache page.
2095 *
2096 * @returns VBox status code.
2097 * @retval VINF_SUCCESS on success.
2098 * @param pPool The pool.
2099 * @param iUser The user index.
2100 */
2101static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
2102{
2103#ifndef IN_RC
2104 const PVM pVM = pPool->CTX_SUFF(pVM);
2105#endif
2106 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
2107 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
2108
2109 /*
2110 * Select one page from the tail of the age list.
2111 */
2112 PPGMPOOLPAGE pPage;
2113 for (unsigned iLoop = 0; ; iLoop++)
2114 {
2115 uint16_t iToFree = pPool->iAgeTail;
2116 if (iToFree == iUser && iUser != NIL_PGMPOOL_IDX)
2117 iToFree = pPool->aPages[iToFree].iAgePrev;
2118/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2119 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2120 {
2121 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2122 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2123 {
2124 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2125 continue;
2126 iToFree = i;
2127 break;
2128 }
2129 }
2130*/
2131 Assert(iToFree != iUser);
2132 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
2133 pPage = &pPool->aPages[iToFree];
2134
2135 /*
2136 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2137 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2138 */
2139 if ( !pgmPoolIsPageLocked(pPage)
2140 && pPage->idx >= PGMPOOL_IDX_FIRST /* paranoia (#6349) */)
2141 break;
2142 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2143 pgmPoolCacheUsed(pPool, pPage);
2144 AssertLogRelReturn(iLoop < 8192, VERR_PGM_POOL_TOO_MANY_LOOPS);
2145 }
2146
2147 /*
2148 * Found a usable page, flush it and return.
2149 */
2150 int rc = pgmPoolFlushPage(pPool, pPage);
2151 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2152 /* todo: find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2153 if (rc == VINF_SUCCESS)
2154 PGM_INVL_ALL_VCPU_TLBS(pVM);
2155 return rc;
2156}
2157
2158
2159/**
2160 * Checks if a kind mismatch is really a page being reused
2161 * or if it's just normal remappings.
2162 *
2163 * @returns true if reused and the cached page (enmKind1) should be flushed
2164 * @returns false if not reused.
2165 * @param enmKind1 The kind of the cached page.
2166 * @param enmKind2 The kind of the requested page.
2167 */
2168static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2169{
2170 switch (enmKind1)
2171 {
2172 /*
2173 * Never reuse them. There is no remapping in non-paging mode.
2174 */
2175 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2176 case PGMPOOLKIND_32BIT_PD_PHYS:
2177 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2178 case PGMPOOLKIND_PAE_PD_PHYS:
2179 case PGMPOOLKIND_PAE_PDPT_PHYS:
2180 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2181 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2182 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2183 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2184 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2185 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2186 return false;
2187
2188 /*
2189 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2190 */
2191 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2192 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2193 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2194 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2195 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2196 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2197 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2198 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2199 case PGMPOOLKIND_32BIT_PD:
2200 case PGMPOOLKIND_PAE_PDPT:
2201 switch (enmKind2)
2202 {
2203 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2204 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2205 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2206 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2207 case PGMPOOLKIND_64BIT_PML4:
2208 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2209 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2210 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2211 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2212 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2213 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2214 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2215 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2216 return true;
2217 default:
2218 return false;
2219 }
2220
2221 /*
2222 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2223 */
2224 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2225 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2226 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2227 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2228 case PGMPOOLKIND_64BIT_PML4:
2229 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2230 switch (enmKind2)
2231 {
2232 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2233 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2234 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2235 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2236 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2237 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2238 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2239 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2240 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2241 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2242 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2243 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2244 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2245 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2246 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2247 return true;
2248 default:
2249 return false;
2250 }
2251
2252 /*
2253 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2254 */
2255 case PGMPOOLKIND_ROOT_NESTED:
2256 return false;
2257
2258 default:
2259 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2260 }
2261}
2262
2263
2264/**
2265 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2266 *
2267 * @returns VBox status code.
2268 * @retval VINF_PGM_CACHED_PAGE on success.
2269 * @retval VERR_FILE_NOT_FOUND if not found.
2270 * @param pPool The pool.
2271 * @param GCPhys The GC physical address of the page we're gonna shadow.
2272 * @param enmKind The kind of mapping.
2273 * @param enmAccess Access type for the mapping (only relevant for big pages)
2274 * @param fA20Enabled Whether the CPU has the A20 gate enabled.
2275 * @param iUser The shadow page pool index of the user table. This is
2276 * NIL_PGMPOOL_IDX for root pages.
2277 * @param iUserTable The index into the user table (shadowed). Ignored if
2278 * root page
2279 * @param ppPage Where to store the pointer to the page.
2280 */
2281static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
2282 uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2283{
2284 /*
2285 * Look up the GCPhys in the hash.
2286 */
2287 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2288 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2289 if (i != NIL_PGMPOOL_IDX)
2290 {
2291 do
2292 {
2293 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2294 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2295 if (pPage->GCPhys == GCPhys)
2296 {
2297 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2298 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess
2299 && pPage->fA20Enabled == fA20Enabled)
2300 {
2301 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2302 * doesn't flush it in case there are no more free use records.
2303 */
2304 pgmPoolCacheUsed(pPool, pPage);
2305
2306 int rc = VINF_SUCCESS;
2307 if (iUser != NIL_PGMPOOL_IDX)
2308 rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2309 if (RT_SUCCESS(rc))
2310 {
2311 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2312 *ppPage = pPage;
2313 if (pPage->cModifications)
2314 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2315 STAM_COUNTER_INC(&pPool->StatCacheHits);
2316 return VINF_PGM_CACHED_PAGE;
2317 }
2318 return rc;
2319 }
2320
2321 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2322 {
2323 /*
2324 * The kind is different. In some cases we should now flush the page
2325 * as it has been reused, but in most cases this is normal remapping
2326 * of PDs as PT or big pages using the GCPhys field in a slightly
2327 * different way than the other kinds.
2328 */
2329 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2330 {
2331 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2332 pgmPoolFlushPage(pPool, pPage);
2333 break;
2334 }
2335 }
2336 }
2337
2338 /* next */
2339 i = pPage->iNext;
2340 } while (i != NIL_PGMPOOL_IDX);
2341 }
2342
2343 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2344 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2345 return VERR_FILE_NOT_FOUND;
2346}
2347
2348
2349/**
2350 * Inserts a page into the cache.
2351 *
2352 * @param pPool The pool.
2353 * @param pPage The cached page.
2354 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2355 */
2356static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2357{
2358 /*
2359 * Insert into the GCPhys hash if the page is fit for that.
2360 */
2361 Assert(!pPage->fCached);
2362 if (fCanBeCached)
2363 {
2364 pPage->fCached = true;
2365 pgmPoolHashInsert(pPool, pPage);
2366 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2367 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2368 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2369 }
2370 else
2371 {
2372 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2373 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2374 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2375 }
2376
2377 /*
2378 * Insert at the head of the age list.
2379 */
2380 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2381 pPage->iAgeNext = pPool->iAgeHead;
2382 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2383 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2384 else
2385 pPool->iAgeTail = pPage->idx;
2386 pPool->iAgeHead = pPage->idx;
2387}
2388
2389
2390/**
2391 * Flushes a cached page.
2392 *
2393 * @param pPool The pool.
2394 * @param pPage The cached page.
2395 */
2396static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2397{
2398 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2399
2400 /*
2401 * Remove the page from the hash.
2402 */
2403 if (pPage->fCached)
2404 {
2405 pPage->fCached = false;
2406 pgmPoolHashRemove(pPool, pPage);
2407 }
2408 else
2409 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2410
2411 /*
2412 * Remove it from the age list.
2413 */
2414 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2415 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2416 else
2417 pPool->iAgeTail = pPage->iAgePrev;
2418 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2419 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2420 else
2421 pPool->iAgeHead = pPage->iAgeNext;
2422 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2423 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2424}
2425
2426
2427/**
2428 * Looks for pages sharing the monitor.
2429 *
2430 * @returns Pointer to the head page.
2431 * @returns NULL if not found.
2432 * @param pPool The Pool
2433 * @param pNewPage The page which is going to be monitored.
2434 */
2435static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2436{
2437 /*
2438 * Look up the GCPhys in the hash.
2439 */
2440 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2441 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2442 if (i == NIL_PGMPOOL_IDX)
2443 return NULL;
2444 do
2445 {
2446 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2447 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2448 && pPage != pNewPage)
2449 {
2450 switch (pPage->enmKind)
2451 {
2452 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2453 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2454 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2455 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2456 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2457 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2458 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2459 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2460 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2461 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2462 case PGMPOOLKIND_64BIT_PML4:
2463 case PGMPOOLKIND_32BIT_PD:
2464 case PGMPOOLKIND_PAE_PDPT:
2465 {
2466 /* find the head */
2467 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2468 {
2469 Assert(pPage->iMonitoredPrev != pPage->idx);
2470 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2471 }
2472 return pPage;
2473 }
2474
2475 /* ignore, no monitoring. */
2476 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2477 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2478 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2479 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2480 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2481 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2482 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2483 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2484 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2485 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2486 case PGMPOOLKIND_ROOT_NESTED:
2487 case PGMPOOLKIND_PAE_PD_PHYS:
2488 case PGMPOOLKIND_PAE_PDPT_PHYS:
2489 case PGMPOOLKIND_32BIT_PD_PHYS:
2490 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2491 break;
2492 default:
2493 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2494 }
2495 }
2496
2497 /* next */
2498 i = pPage->iNext;
2499 } while (i != NIL_PGMPOOL_IDX);
2500 return NULL;
2501}
2502
2503
2504/**
2505 * Enabled write monitoring of a guest page.
2506 *
2507 * @returns VBox status code.
2508 * @retval VINF_SUCCESS on success.
2509 * @param pPool The pool.
2510 * @param pPage The cached page.
2511 */
2512static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2513{
2514 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2515
2516 /*
2517 * Filter out the relevant kinds.
2518 */
2519 switch (pPage->enmKind)
2520 {
2521 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2522 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2523 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2524 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2525 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2526 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2527 case PGMPOOLKIND_64BIT_PML4:
2528 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2529 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2530 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2531 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2532 case PGMPOOLKIND_32BIT_PD:
2533 case PGMPOOLKIND_PAE_PDPT:
2534 break;
2535
2536 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2537 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2538 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2539 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2540 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2541 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2542 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2543 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2544 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2545 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2546 case PGMPOOLKIND_ROOT_NESTED:
2547 /* Nothing to monitor here. */
2548 return VINF_SUCCESS;
2549
2550 case PGMPOOLKIND_32BIT_PD_PHYS:
2551 case PGMPOOLKIND_PAE_PDPT_PHYS:
2552 case PGMPOOLKIND_PAE_PD_PHYS:
2553 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2554 /* Nothing to monitor here. */
2555 return VINF_SUCCESS;
2556 default:
2557 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2558 }
2559
2560 /*
2561 * Install handler.
2562 */
2563 int rc;
2564 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2565 if (pPageHead)
2566 {
2567 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2568 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2569
2570#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2571 if (pPageHead->fDirty)
2572 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirtyEntry, false /* do not remove */);
2573#endif
2574
2575 pPage->iMonitoredPrev = pPageHead->idx;
2576 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2577 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2578 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2579 pPageHead->iMonitoredNext = pPage->idx;
2580 rc = VINF_SUCCESS;
2581 }
2582 else
2583 {
2584 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2585 PVM pVM = pPool->CTX_SUFF(pVM);
2586 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2587 rc = PGMHandlerPhysicalRegister(pVM, GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK, pPool->hAccessHandlerType,
2588 MMHyperCCToR3(pVM, pPage), MMHyperCCToR0(pVM, pPage), MMHyperCCToRC(pVM, pPage),
2589 NIL_RTR3PTR /*pszDesc*/);
2590 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2591 * the heap size should suffice. */
2592 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2593 PVMCPU pVCpu = VMMGetCpu(pVM);
2594 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2595 }
2596 pPage->fMonitored = true;
2597 return rc;
2598}
2599
2600
2601/**
2602 * Disables write monitoring of a guest page.
2603 *
2604 * @returns VBox status code.
2605 * @retval VINF_SUCCESS on success.
2606 * @param pPool The pool.
2607 * @param pPage The cached page.
2608 */
2609static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2610{
2611 /*
2612 * Filter out the relevant kinds.
2613 */
2614 switch (pPage->enmKind)
2615 {
2616 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2617 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2618 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2619 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2620 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2621 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2622 case PGMPOOLKIND_64BIT_PML4:
2623 case PGMPOOLKIND_32BIT_PD:
2624 case PGMPOOLKIND_PAE_PDPT:
2625 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2626 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2627 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2628 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2629 break;
2630
2631 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2632 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2633 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2634 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2635 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2636 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2637 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2638 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2639 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2640 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2641 case PGMPOOLKIND_ROOT_NESTED:
2642 case PGMPOOLKIND_PAE_PD_PHYS:
2643 case PGMPOOLKIND_PAE_PDPT_PHYS:
2644 case PGMPOOLKIND_32BIT_PD_PHYS:
2645 /* Nothing to monitor here. */
2646 Assert(!pPage->fMonitored);
2647 return VINF_SUCCESS;
2648
2649 default:
2650 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2651 }
2652 Assert(pPage->fMonitored);
2653
2654 /*
2655 * Remove the page from the monitored list or uninstall it if last.
2656 */
2657 const PVM pVM = pPool->CTX_SUFF(pVM);
2658 int rc;
2659 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2660 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2661 {
2662 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2663 {
2664 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2665 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2666 rc = PGMHandlerPhysicalChangeUserArgs(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK, MMHyperCCToR3(pVM, pNewHead),
2667 MMHyperCCToR0(pVM, pNewHead), MMHyperCCToRC(pVM, pNewHead));
2668
2669 AssertFatalRCSuccess(rc);
2670 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2671 }
2672 else
2673 {
2674 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2675 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2676 {
2677 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2678 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2679 }
2680 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2681 rc = VINF_SUCCESS;
2682 }
2683 }
2684 else
2685 {
2686 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2687 AssertFatalRC(rc);
2688 PVMCPU pVCpu = VMMGetCpu(pVM);
2689 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2690 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2691 }
2692 pPage->fMonitored = false;
2693
2694 /*
2695 * Remove it from the list of modified pages (if in it).
2696 */
2697 pgmPoolMonitorModifiedRemove(pPool, pPage);
2698
2699 return rc;
2700}
2701
2702
2703/**
2704 * Inserts the page into the list of modified pages.
2705 *
2706 * @param pPool The pool.
2707 * @param pPage The page.
2708 */
2709void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2710{
2711 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2712 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2713 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2714 && pPool->iModifiedHead != pPage->idx,
2715 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2716 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2717 pPool->iModifiedHead, pPool->cModifiedPages));
2718
2719 pPage->iModifiedNext = pPool->iModifiedHead;
2720 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2721 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2722 pPool->iModifiedHead = pPage->idx;
2723 pPool->cModifiedPages++;
2724#ifdef VBOX_WITH_STATISTICS
2725 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2726 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2727#endif
2728}
2729
2730
2731/**
2732 * Removes the page from the list of modified pages and resets the
2733 * modification counter.
2734 *
2735 * @param pPool The pool.
2736 * @param pPage The page which is believed to be in the list of modified pages.
2737 */
2738static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2739{
2740 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2741 if (pPool->iModifiedHead == pPage->idx)
2742 {
2743 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2744 pPool->iModifiedHead = pPage->iModifiedNext;
2745 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2746 {
2747 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2748 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2749 }
2750 pPool->cModifiedPages--;
2751 }
2752 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2753 {
2754 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2755 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2756 {
2757 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2758 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2759 }
2760 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2761 pPool->cModifiedPages--;
2762 }
2763 else
2764 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2765 pPage->cModifications = 0;
2766}
2767
2768
2769/**
2770 * Zaps the list of modified pages, resetting their modification counters in the process.
2771 *
2772 * @param pVM Pointer to the VM.
2773 */
2774static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2775{
2776 pgmLock(pVM);
2777 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2778 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2779
2780 unsigned cPages = 0; NOREF(cPages);
2781
2782#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2783 pgmPoolResetDirtyPages(pVM);
2784#endif
2785
2786 uint16_t idx = pPool->iModifiedHead;
2787 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2788 while (idx != NIL_PGMPOOL_IDX)
2789 {
2790 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2791 idx = pPage->iModifiedNext;
2792 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2793 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2794 pPage->cModifications = 0;
2795 Assert(++cPages);
2796 }
2797 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2798 pPool->cModifiedPages = 0;
2799 pgmUnlock(pVM);
2800}
2801
2802
2803/**
2804 * Handle SyncCR3 pool tasks
2805 *
2806 * @returns VBox status code.
2807 * @retval VINF_SUCCESS if successfully added.
2808 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2809 * @param pVCpu Pointer to the VMCPU.
2810 * @remark Should only be used when monitoring is available, thus placed in
2811 * the PGMPOOL_WITH_MONITORING #ifdef.
2812 */
2813int pgmPoolSyncCR3(PVMCPU pVCpu)
2814{
2815 PVM pVM = pVCpu->CTX_SUFF(pVM);
2816 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2817
2818 /*
2819 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2820 * Occasionally we will have to clear all the shadow page tables because we wanted
2821 * to monitor a page which was mapped by too many shadowed page tables. This operation
2822 * sometimes referred to as a 'lightweight flush'.
2823 */
2824# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2825 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2826 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2827# else /* !IN_RING3 */
2828 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2829 {
2830 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2831 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2832
2833 /* Make sure all other VCPUs return to ring 3. */
2834 if (pVM->cCpus > 1)
2835 {
2836 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2837 PGM_INVL_ALL_VCPU_TLBS(pVM);
2838 }
2839 return VINF_PGM_SYNC_CR3;
2840 }
2841# endif /* !IN_RING3 */
2842 else
2843 {
2844 pgmPoolMonitorModifiedClearAll(pVM);
2845
2846 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2847 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2848 {
2849 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2850 return pgmPoolSyncCR3(pVCpu);
2851 }
2852 }
2853 return VINF_SUCCESS;
2854}
2855
2856
2857/**
2858 * Frees up at least one user entry.
2859 *
2860 * @returns VBox status code.
2861 * @retval VINF_SUCCESS if successfully added.
2862 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2863 * @param pPool The pool.
2864 * @param iUser The user index.
2865 */
2866static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2867{
2868 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2869 /*
2870 * Just free cached pages in a braindead fashion.
2871 */
2872 /** @todo walk the age list backwards and free the first with usage. */
2873 int rc = VINF_SUCCESS;
2874 do
2875 {
2876 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2877 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2878 rc = rc2;
2879 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2880 return rc;
2881}
2882
2883
2884/**
2885 * Inserts a page into the cache.
2886 *
2887 * This will create user node for the page, insert it into the GCPhys
2888 * hash, and insert it into the age list.
2889 *
2890 * @returns VBox status code.
2891 * @retval VINF_SUCCESS if successfully added.
2892 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2893 * @param pPool The pool.
2894 * @param pPage The cached page.
2895 * @param GCPhys The GC physical address of the page we're gonna shadow.
2896 * @param iUser The user index.
2897 * @param iUserTable The user table index.
2898 */
2899DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2900{
2901 int rc = VINF_SUCCESS;
2902 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2903
2904 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable));
2905
2906 if (iUser != NIL_PGMPOOL_IDX)
2907 {
2908#ifdef VBOX_STRICT
2909 /*
2910 * Check that the entry doesn't already exists.
2911 */
2912 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2913 {
2914 uint16_t i = pPage->iUserHead;
2915 do
2916 {
2917 Assert(i < pPool->cMaxUsers);
2918 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2919 i = paUsers[i].iNext;
2920 } while (i != NIL_PGMPOOL_USER_INDEX);
2921 }
2922#endif
2923
2924 /*
2925 * Find free a user node.
2926 */
2927 uint16_t i = pPool->iUserFreeHead;
2928 if (i == NIL_PGMPOOL_USER_INDEX)
2929 {
2930 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2931 if (RT_FAILURE(rc))
2932 return rc;
2933 i = pPool->iUserFreeHead;
2934 }
2935
2936 /*
2937 * Unlink the user node from the free list,
2938 * initialize and insert it into the user list.
2939 */
2940 pPool->iUserFreeHead = paUsers[i].iNext;
2941 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2942 paUsers[i].iUser = iUser;
2943 paUsers[i].iUserTable = iUserTable;
2944 pPage->iUserHead = i;
2945 }
2946 else
2947 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2948
2949
2950 /*
2951 * Insert into cache and enable monitoring of the guest page if enabled.
2952 *
2953 * Until we implement caching of all levels, including the CR3 one, we'll
2954 * have to make sure we don't try monitor & cache any recursive reuse of
2955 * a monitored CR3 page. Because all windows versions are doing this we'll
2956 * have to be able to do combined access monitoring, CR3 + PT and
2957 * PD + PT (guest PAE).
2958 *
2959 * Update:
2960 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2961 */
2962 const bool fCanBeMonitored = true;
2963 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2964 if (fCanBeMonitored)
2965 {
2966 rc = pgmPoolMonitorInsert(pPool, pPage);
2967 AssertRC(rc);
2968 }
2969 return rc;
2970}
2971
2972
2973/**
2974 * Adds a user reference to a page.
2975 *
2976 * This will move the page to the head of the
2977 *
2978 * @returns VBox status code.
2979 * @retval VINF_SUCCESS if successfully added.
2980 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2981 * @param pPool The pool.
2982 * @param pPage The cached page.
2983 * @param iUser The user index.
2984 * @param iUserTable The user table.
2985 */
2986static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2987{
2988 Log3(("pgmPoolTrackAddUser: GCPhys=%RGp iUser=%%x iUserTable=%x\n", pPage->GCPhys, iUser, iUserTable));
2989 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2990 Assert(iUser != NIL_PGMPOOL_IDX);
2991
2992# ifdef VBOX_STRICT
2993 /*
2994 * Check that the entry doesn't already exists. We only allow multiple
2995 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
2996 */
2997 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2998 {
2999 uint16_t i = pPage->iUserHead;
3000 do
3001 {
3002 Assert(i < pPool->cMaxUsers);
3003 /** @todo this assertion looks odd... Shouldn't it be && here? */
3004 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
3005 i = paUsers[i].iNext;
3006 } while (i != NIL_PGMPOOL_USER_INDEX);
3007 }
3008# endif
3009
3010 /*
3011 * Allocate a user node.
3012 */
3013 uint16_t i = pPool->iUserFreeHead;
3014 if (i == NIL_PGMPOOL_USER_INDEX)
3015 {
3016 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
3017 if (RT_FAILURE(rc))
3018 return rc;
3019 i = pPool->iUserFreeHead;
3020 }
3021 pPool->iUserFreeHead = paUsers[i].iNext;
3022
3023 /*
3024 * Initialize the user node and insert it.
3025 */
3026 paUsers[i].iNext = pPage->iUserHead;
3027 paUsers[i].iUser = iUser;
3028 paUsers[i].iUserTable = iUserTable;
3029 pPage->iUserHead = i;
3030
3031# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3032 if (pPage->fDirty)
3033 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirtyEntry, false /* do not remove */);
3034# endif
3035
3036 /*
3037 * Tell the cache to update its replacement stats for this page.
3038 */
3039 pgmPoolCacheUsed(pPool, pPage);
3040 return VINF_SUCCESS;
3041}
3042
3043
3044/**
3045 * Frees a user record associated with a page.
3046 *
3047 * This does not clear the entry in the user table, it simply replaces the
3048 * user record to the chain of free records.
3049 *
3050 * @param pPool The pool.
3051 * @param HCPhys The HC physical address of the shadow page.
3052 * @param iUser The shadow page pool index of the user table.
3053 * @param iUserTable The index into the user table (shadowed).
3054 *
3055 * @remarks Don't call this for root pages.
3056 */
3057static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3058{
3059 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
3060 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3061 Assert(iUser != NIL_PGMPOOL_IDX);
3062
3063 /*
3064 * Unlink and free the specified user entry.
3065 */
3066
3067 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
3068 uint16_t i = pPage->iUserHead;
3069 if ( i != NIL_PGMPOOL_USER_INDEX
3070 && paUsers[i].iUser == iUser
3071 && paUsers[i].iUserTable == iUserTable)
3072 {
3073 pPage->iUserHead = paUsers[i].iNext;
3074
3075 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3076 paUsers[i].iNext = pPool->iUserFreeHead;
3077 pPool->iUserFreeHead = i;
3078 return;
3079 }
3080
3081 /* General: Linear search. */
3082 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
3083 while (i != NIL_PGMPOOL_USER_INDEX)
3084 {
3085 if ( paUsers[i].iUser == iUser
3086 && paUsers[i].iUserTable == iUserTable)
3087 {
3088 if (iPrev != NIL_PGMPOOL_USER_INDEX)
3089 paUsers[iPrev].iNext = paUsers[i].iNext;
3090 else
3091 pPage->iUserHead = paUsers[i].iNext;
3092
3093 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3094 paUsers[i].iNext = pPool->iUserFreeHead;
3095 pPool->iUserFreeHead = i;
3096 return;
3097 }
3098 iPrev = i;
3099 i = paUsers[i].iNext;
3100 }
3101
3102 /* Fatal: didn't find it */
3103 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
3104 iUser, iUserTable, pPage->GCPhys));
3105}
3106
3107
3108/**
3109 * Gets the entry size of a shadow table.
3110 *
3111 * @param enmKind The kind of page.
3112 *
3113 * @returns The size of the entry in bytes. That is, 4 or 8.
3114 * @returns If the kind is not for a table, an assertion is raised and 0 is
3115 * returned.
3116 */
3117DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
3118{
3119 switch (enmKind)
3120 {
3121 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3122 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3123 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3124 case PGMPOOLKIND_32BIT_PD:
3125 case PGMPOOLKIND_32BIT_PD_PHYS:
3126 return 4;
3127
3128 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3129 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3130 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3131 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3132 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3133 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3134 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3135 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3136 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3137 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3138 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3139 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3140 case PGMPOOLKIND_64BIT_PML4:
3141 case PGMPOOLKIND_PAE_PDPT:
3142 case PGMPOOLKIND_ROOT_NESTED:
3143 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3144 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3145 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3146 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3147 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3148 case PGMPOOLKIND_PAE_PD_PHYS:
3149 case PGMPOOLKIND_PAE_PDPT_PHYS:
3150 return 8;
3151
3152 default:
3153 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3154 }
3155}
3156
3157
3158/**
3159 * Gets the entry size of a guest table.
3160 *
3161 * @param enmKind The kind of page.
3162 *
3163 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3164 * @returns If the kind is not for a table, an assertion is raised and 0 is
3165 * returned.
3166 */
3167DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3168{
3169 switch (enmKind)
3170 {
3171 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3172 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3173 case PGMPOOLKIND_32BIT_PD:
3174 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3175 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3176 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3177 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3178 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3179 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3180 return 4;
3181
3182 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3183 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3184 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3185 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3186 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3187 case PGMPOOLKIND_64BIT_PML4:
3188 case PGMPOOLKIND_PAE_PDPT:
3189 return 8;
3190
3191 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3192 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3193 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3194 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3195 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3196 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3197 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3198 case PGMPOOLKIND_ROOT_NESTED:
3199 case PGMPOOLKIND_PAE_PD_PHYS:
3200 case PGMPOOLKIND_PAE_PDPT_PHYS:
3201 case PGMPOOLKIND_32BIT_PD_PHYS:
3202 /** @todo can we return 0? (nobody is calling this...) */
3203 AssertFailed();
3204 return 0;
3205
3206 default:
3207 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3208 }
3209}
3210
3211
3212/**
3213 * Checks one shadow page table entry for a mapping of a physical page.
3214 *
3215 * @returns true / false indicating removal of all relevant PTEs
3216 *
3217 * @param pVM Pointer to the VM.
3218 * @param pPhysPage The guest page in question.
3219 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3220 * @param iShw The shadow page table.
3221 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3222 */
3223static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3224{
3225 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3226 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3227 bool fRet = false;
3228
3229 /*
3230 * Assert sanity.
3231 */
3232 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3233 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3234 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3235
3236 /*
3237 * Then, clear the actual mappings to the page in the shadow PT.
3238 */
3239 switch (pPage->enmKind)
3240 {
3241 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3242 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3243 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3244 {
3245 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3246 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3247 uint32_t u32AndMask = 0;
3248 uint32_t u32OrMask = 0;
3249
3250 if (!fFlushPTEs)
3251 {
3252 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3253 {
3254 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
3255 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
3256 u32OrMask = X86_PTE_RW;
3257 u32AndMask = UINT32_MAX;
3258 fRet = true;
3259 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3260 break;
3261
3262 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3263 u32OrMask = 0;
3264 u32AndMask = ~X86_PTE_RW;
3265 fRet = true;
3266 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3267 break;
3268 default:
3269 /* (shouldn't be here, will assert below) */
3270 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3271 break;
3272 }
3273 }
3274 else
3275 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3276
3277 /* Update the counter if we're removing references. */
3278 if (!u32AndMask)
3279 {
3280 Assert(pPage->cPresent);
3281 Assert(pPool->cPresent);
3282 pPage->cPresent--;
3283 pPool->cPresent--;
3284 }
3285
3286 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3287 {
3288 X86PTE Pte;
3289
3290 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3291 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3292 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3293 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3294
3295 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3296 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3297 return fRet;
3298 }
3299#ifdef LOG_ENABLED
3300 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3301 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3302 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3303 {
3304 Log(("i=%d cFound=%d\n", i, ++cFound));
3305 }
3306#endif
3307 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3308 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3309 break;
3310 }
3311
3312 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3313 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3314 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3315 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3316 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3317 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3318 {
3319 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3320 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3321 uint64_t u64OrMask = 0;
3322 uint64_t u64AndMask = 0;
3323
3324 if (!fFlushPTEs)
3325 {
3326 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3327 {
3328 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3329 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3330 u64OrMask = X86_PTE_RW;
3331 u64AndMask = UINT64_MAX;
3332 fRet = true;
3333 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3334 break;
3335
3336 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3337 u64OrMask = 0;
3338 u64AndMask = ~(uint64_t)X86_PTE_RW;
3339 fRet = true;
3340 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3341 break;
3342
3343 default:
3344 /* (shouldn't be here, will assert below) */
3345 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3346 break;
3347 }
3348 }
3349 else
3350 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3351
3352 /* Update the counter if we're removing references. */
3353 if (!u64AndMask)
3354 {
3355 Assert(pPage->cPresent);
3356 Assert(pPool->cPresent);
3357 pPage->cPresent--;
3358 pPool->cPresent--;
3359 }
3360
3361 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3362 {
3363 X86PTEPAE Pte;
3364
3365 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3366 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3367 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3368 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3369
3370 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3371 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3372 return fRet;
3373 }
3374#ifdef LOG_ENABLED
3375 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3376 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3377 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3378 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3379 Log(("i=%d cFound=%d\n", i, ++cFound));
3380#endif
3381 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3382 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3383 break;
3384 }
3385
3386#ifdef PGM_WITH_LARGE_PAGES
3387 /* Large page case only. */
3388 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3389 {
3390 Assert(pVM->pgm.s.fNestedPaging);
3391
3392 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3393 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3394
3395 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3396 {
3397 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3398 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3399 pPD->a[iPte].u = 0;
3400 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3401
3402 /* Update the counter as we're removing references. */
3403 Assert(pPage->cPresent);
3404 Assert(pPool->cPresent);
3405 pPage->cPresent--;
3406 pPool->cPresent--;
3407
3408 return fRet;
3409 }
3410# ifdef LOG_ENABLED
3411 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3412 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3413 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3414 Log(("i=%d cFound=%d\n", i, ++cFound));
3415# endif
3416 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3417 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3418 break;
3419 }
3420
3421 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3422 case PGMPOOLKIND_PAE_PD_PHYS:
3423 {
3424 Assert(pVM->pgm.s.fNestedPaging);
3425
3426 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3427 PX86PD pPD = (PX86PD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3428
3429 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3430 {
3431 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3432 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3433 pPD->a[iPte].u = 0;
3434 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3435
3436 /* Update the counter as we're removing references. */
3437 Assert(pPage->cPresent);
3438 Assert(pPool->cPresent);
3439 pPage->cPresent--;
3440 pPool->cPresent--;
3441 return fRet;
3442 }
3443# ifdef LOG_ENABLED
3444 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3445 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3446 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3447 Log(("i=%d cFound=%d\n", i, ++cFound));
3448# endif
3449 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3450 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3451 break;
3452 }
3453#endif /* PGM_WITH_LARGE_PAGES */
3454
3455 default:
3456 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3457 }
3458
3459 /* not reached. */
3460#ifndef _MSC_VER
3461 return fRet;
3462#endif
3463}
3464
3465
3466/**
3467 * Scans one shadow page table for mappings of a physical page.
3468 *
3469 * @param pVM Pointer to the VM.
3470 * @param pPhysPage The guest page in question.
3471 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3472 * @param iShw The shadow page table.
3473 */
3474static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3475{
3476 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3477
3478 /* We should only come here with when there's only one reference to this physical page. */
3479 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3480
3481 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3482 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3483 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3484 if (!fKeptPTEs)
3485 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3486 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3487}
3488
3489
3490/**
3491 * Flushes a list of shadow page tables mapping the same physical page.
3492 *
3493 * @param pVM Pointer to the VM.
3494 * @param pPhysPage The guest page in question.
3495 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3496 * @param iPhysExt The physical cross reference extent list to flush.
3497 */
3498static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3499{
3500 PGM_LOCK_ASSERT_OWNER(pVM);
3501 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3502 bool fKeepList = false;
3503
3504 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3505 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3506
3507 const uint16_t iPhysExtStart = iPhysExt;
3508 PPGMPOOLPHYSEXT pPhysExt;
3509 do
3510 {
3511 Assert(iPhysExt < pPool->cMaxPhysExts);
3512 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3513 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3514 {
3515 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3516 {
3517 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3518 if (!fKeptPTEs)
3519 {
3520 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3521 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3522 }
3523 else
3524 fKeepList = true;
3525 }
3526 }
3527 /* next */
3528 iPhysExt = pPhysExt->iNext;
3529 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3530
3531 if (!fKeepList)
3532 {
3533 /* insert the list into the free list and clear the ram range entry. */
3534 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3535 pPool->iPhysExtFreeHead = iPhysExtStart;
3536 /* Invalidate the tracking data. */
3537 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3538 }
3539
3540 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3541}
3542
3543
3544/**
3545 * Flushes all shadow page table mappings of the given guest page.
3546 *
3547 * This is typically called when the host page backing the guest one has been
3548 * replaced or when the page protection was changed due to a guest access
3549 * caught by the monitoring.
3550 *
3551 * @returns VBox status code.
3552 * @retval VINF_SUCCESS if all references has been successfully cleared.
3553 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3554 * pool cleaning. FF and sync flags are set.
3555 *
3556 * @param pVM Pointer to the VM.
3557 * @param GCPhysPage GC physical address of the page in question
3558 * @param pPhysPage The guest page in question.
3559 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3560 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3561 * flushed, it is NOT touched if this isn't necessary.
3562 * The caller MUST initialized this to @a false.
3563 */
3564int pgmPoolTrackUpdateGCPhys(PVM pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3565{
3566 PVMCPU pVCpu = VMMGetCpu(pVM);
3567 pgmLock(pVM);
3568 int rc = VINF_SUCCESS;
3569
3570#ifdef PGM_WITH_LARGE_PAGES
3571 /* Is this page part of a large page? */
3572 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3573 {
3574 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3575 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3576
3577 /* Fetch the large page base. */
3578 PPGMPAGE pLargePage;
3579 if (GCPhysBase != GCPhysPage)
3580 {
3581 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3582 AssertFatal(pLargePage);
3583 }
3584 else
3585 pLargePage = pPhysPage;
3586
3587 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3588
3589 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3590 {
3591 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3592 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3593 pVM->pgm.s.cLargePagesDisabled++;
3594
3595 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3596 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3597
3598 *pfFlushTLBs = true;
3599 pgmUnlock(pVM);
3600 return rc;
3601 }
3602 }
3603#else
3604 NOREF(GCPhysPage);
3605#endif /* PGM_WITH_LARGE_PAGES */
3606
3607 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3608 if (u16)
3609 {
3610 /*
3611 * The zero page is currently screwing up the tracking and we'll
3612 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3613 * is defined, zero pages won't normally be mapped. Some kind of solution
3614 * will be needed for this problem of course, but it will have to wait...
3615 */
3616 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3617 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3618 rc = VINF_PGM_GCPHYS_ALIASED;
3619 else
3620 {
3621# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC) /** @todo we can drop this now. */
3622 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3623 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3624 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
3625# endif
3626
3627 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3628 {
3629 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3630 pgmPoolTrackFlushGCPhysPT(pVM,
3631 pPhysPage,
3632 fFlushPTEs,
3633 PGMPOOL_TD_GET_IDX(u16));
3634 }
3635 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3636 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3637 else
3638 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3639 *pfFlushTLBs = true;
3640
3641# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
3642 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
3643# endif
3644 }
3645 }
3646
3647 if (rc == VINF_PGM_GCPHYS_ALIASED)
3648 {
3649 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3650 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3651 rc = VINF_PGM_SYNC_CR3;
3652 }
3653 pgmUnlock(pVM);
3654 return rc;
3655}
3656
3657
3658/**
3659 * Scans all shadow page tables for mappings of a physical page.
3660 *
3661 * This may be slow, but it's most likely more efficient than cleaning
3662 * out the entire page pool / cache.
3663 *
3664 * @returns VBox status code.
3665 * @retval VINF_SUCCESS if all references has been successfully cleared.
3666 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3667 * a page pool cleaning.
3668 *
3669 * @param pVM Pointer to the VM.
3670 * @param pPhysPage The guest page in question.
3671 */
3672int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3673{
3674 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3675 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3676 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3677 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3678
3679 /*
3680 * There is a limit to what makes sense.
3681 */
3682 if ( pPool->cPresent > 1024
3683 && pVM->cCpus == 1)
3684 {
3685 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3686 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3687 return VINF_PGM_GCPHYS_ALIASED;
3688 }
3689
3690 /*
3691 * Iterate all the pages until we've encountered all that in use.
3692 * This is simple but not quite optimal solution.
3693 */
3694 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P; /** @todo drop X86_PTE_P here as we always test if present separately, anyway. */
3695 const uint32_t u32 = u64; /** @todo move into the 32BIT_PT_xx case */
3696 unsigned cLeft = pPool->cUsedPages;
3697 unsigned iPage = pPool->cCurPages;
3698 while (--iPage >= PGMPOOL_IDX_FIRST)
3699 {
3700 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3701 if ( pPage->GCPhys != NIL_RTGCPHYS
3702 && pPage->cPresent)
3703 {
3704 switch (pPage->enmKind)
3705 {
3706 /*
3707 * We only care about shadow page tables.
3708 */
3709 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3710 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3711 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3712 {
3713 unsigned cPresent = pPage->cPresent;
3714 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3715 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3716 if (pPT->a[i].n.u1Present)
3717 {
3718 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3719 {
3720 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3721 pPT->a[i].u = 0;
3722
3723 /* Update the counter as we're removing references. */
3724 Assert(pPage->cPresent);
3725 Assert(pPool->cPresent);
3726 pPage->cPresent--;
3727 pPool->cPresent--;
3728 }
3729 if (!--cPresent)
3730 break;
3731 }
3732 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3733 break;
3734 }
3735
3736 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3737 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3738 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3739 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3740 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3741 {
3742 unsigned cPresent = pPage->cPresent;
3743 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3744 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3745 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3746 {
3747 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3748 {
3749 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3750 PGMSHWPTEPAE_SET(pPT->a[i], 0); /// @todo why not atomic?
3751
3752 /* Update the counter as we're removing references. */
3753 Assert(pPage->cPresent);
3754 Assert(pPool->cPresent);
3755 pPage->cPresent--;
3756 pPool->cPresent--;
3757 }
3758 if (!--cPresent)
3759 break;
3760 }
3761 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3762 break;
3763 }
3764#ifndef IN_RC
3765 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3766 {
3767 unsigned cPresent = pPage->cPresent;
3768 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3769 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3770 if (pPT->a[i].n.u1Present)
3771 {
3772 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3773 {
3774 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3775 pPT->a[i].u = 0;
3776
3777 /* Update the counter as we're removing references. */
3778 Assert(pPage->cPresent);
3779 Assert(pPool->cPresent);
3780 pPage->cPresent--;
3781 pPool->cPresent--;
3782 }
3783 if (!--cPresent)
3784 break;
3785 }
3786 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3787 break;
3788 }
3789#endif
3790 }
3791 if (!--cLeft)
3792 break;
3793 }
3794 }
3795
3796 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3797 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3798
3799 /*
3800 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3801 */
3802 if (pPool->cPresent > 1024)
3803 {
3804 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3805 return VINF_PGM_GCPHYS_ALIASED;
3806 }
3807
3808 return VINF_SUCCESS;
3809}
3810
3811
3812/**
3813 * Clears the user entry in a user table.
3814 *
3815 * This is used to remove all references to a page when flushing it.
3816 */
3817static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3818{
3819 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3820 Assert(pUser->iUser < pPool->cCurPages);
3821 uint32_t iUserTable = pUser->iUserTable;
3822
3823 /*
3824 * Map the user page. Ignore references made by fictitious pages.
3825 */
3826 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3827 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3828 union
3829 {
3830 uint64_t *pau64;
3831 uint32_t *pau32;
3832 } u;
3833 if (pUserPage->idx < PGMPOOL_IDX_FIRST)
3834 {
3835 Assert(!pUserPage->pvPageR3);
3836 return;
3837 }
3838 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3839
3840
3841 /* Safety precaution in case we change the paging for other modes too in the future. */
3842 Assert(!pgmPoolIsPageLocked(pPage));
3843
3844#ifdef VBOX_STRICT
3845 /*
3846 * Some sanity checks.
3847 */
3848 switch (pUserPage->enmKind)
3849 {
3850 case PGMPOOLKIND_32BIT_PD:
3851 case PGMPOOLKIND_32BIT_PD_PHYS:
3852 Assert(iUserTable < X86_PG_ENTRIES);
3853 break;
3854 case PGMPOOLKIND_PAE_PDPT:
3855 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3856 case PGMPOOLKIND_PAE_PDPT_PHYS:
3857 Assert(iUserTable < 4);
3858 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3859 break;
3860 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3861 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3862 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3863 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3864 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3865 case PGMPOOLKIND_PAE_PD_PHYS:
3866 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3867 break;
3868 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3869 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3870 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3871 break;
3872 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3873 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3874 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3875 break;
3876 case PGMPOOLKIND_64BIT_PML4:
3877 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3878 /* GCPhys >> PAGE_SHIFT is the index here */
3879 break;
3880 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3881 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3882 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3883 break;
3884
3885 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3886 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3887 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3888 break;
3889
3890 case PGMPOOLKIND_ROOT_NESTED:
3891 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3892 break;
3893
3894 default:
3895 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3896 break;
3897 }
3898#endif /* VBOX_STRICT */
3899
3900 /*
3901 * Clear the entry in the user page.
3902 */
3903 switch (pUserPage->enmKind)
3904 {
3905 /* 32-bit entries */
3906 case PGMPOOLKIND_32BIT_PD:
3907 case PGMPOOLKIND_32BIT_PD_PHYS:
3908 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
3909 break;
3910
3911 /* 64-bit entries */
3912 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3913 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3914 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3915 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3916 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3917#ifdef IN_RC
3918 /*
3919 * In 32 bits PAE mode we *must* invalidate the TLB when changing a
3920 * PDPT entry; the CPU fetches them only during cr3 load, so any
3921 * non-present PDPT will continue to cause page faults.
3922 */
3923 ASMReloadCR3();
3924 /* no break */
3925#endif
3926 case PGMPOOLKIND_PAE_PD_PHYS:
3927 case PGMPOOLKIND_PAE_PDPT_PHYS:
3928 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3929 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3930 case PGMPOOLKIND_64BIT_PML4:
3931 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3932 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3933 case PGMPOOLKIND_PAE_PDPT:
3934 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3935 case PGMPOOLKIND_ROOT_NESTED:
3936 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3937 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3938 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
3939 break;
3940
3941 default:
3942 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3943 }
3944 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
3945}
3946
3947
3948/**
3949 * Clears all users of a page.
3950 */
3951static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3952{
3953 /*
3954 * Free all the user records.
3955 */
3956 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3957
3958 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3959 uint16_t i = pPage->iUserHead;
3960 while (i != NIL_PGMPOOL_USER_INDEX)
3961 {
3962 /* Clear enter in user table. */
3963 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3964
3965 /* Free it. */
3966 const uint16_t iNext = paUsers[i].iNext;
3967 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3968 paUsers[i].iNext = pPool->iUserFreeHead;
3969 pPool->iUserFreeHead = i;
3970
3971 /* Next. */
3972 i = iNext;
3973 }
3974 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3975}
3976
3977
3978/**
3979 * Allocates a new physical cross reference extent.
3980 *
3981 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3982 * @param pVM Pointer to the VM.
3983 * @param piPhysExt Where to store the phys ext index.
3984 */
3985PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3986{
3987 PGM_LOCK_ASSERT_OWNER(pVM);
3988 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3989 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3990 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3991 {
3992 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3993 return NULL;
3994 }
3995 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3996 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3997 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3998 *piPhysExt = iPhysExt;
3999 return pPhysExt;
4000}
4001
4002
4003/**
4004 * Frees a physical cross reference extent.
4005 *
4006 * @param pVM Pointer to the VM.
4007 * @param iPhysExt The extent to free.
4008 */
4009void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
4010{
4011 PGM_LOCK_ASSERT_OWNER(pVM);
4012 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4013 Assert(iPhysExt < pPool->cMaxPhysExts);
4014 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4015 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4016 {
4017 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4018 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4019 }
4020 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4021 pPool->iPhysExtFreeHead = iPhysExt;
4022}
4023
4024
4025/**
4026 * Frees a physical cross reference extent.
4027 *
4028 * @param pVM Pointer to the VM.
4029 * @param iPhysExt The extent to free.
4030 */
4031void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
4032{
4033 PGM_LOCK_ASSERT_OWNER(pVM);
4034 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4035
4036 const uint16_t iPhysExtStart = iPhysExt;
4037 PPGMPOOLPHYSEXT pPhysExt;
4038 do
4039 {
4040 Assert(iPhysExt < pPool->cMaxPhysExts);
4041 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4042 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4043 {
4044 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4045 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4046 }
4047
4048 /* next */
4049 iPhysExt = pPhysExt->iNext;
4050 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4051
4052 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4053 pPool->iPhysExtFreeHead = iPhysExtStart;
4054}
4055
4056
4057/**
4058 * Insert a reference into a list of physical cross reference extents.
4059 *
4060 * @returns The new tracking data for PGMPAGE.
4061 *
4062 * @param pVM Pointer to the VM.
4063 * @param iPhysExt The physical extent index of the list head.
4064 * @param iShwPT The shadow page table index.
4065 * @param iPte Page table entry
4066 *
4067 */
4068static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
4069{
4070 PGM_LOCK_ASSERT_OWNER(pVM);
4071 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4072 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4073
4074 /*
4075 * Special common cases.
4076 */
4077 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
4078 {
4079 paPhysExts[iPhysExt].aidx[1] = iShwPT;
4080 paPhysExts[iPhysExt].apte[1] = iPte;
4081 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4082 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
4083 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4084 }
4085 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
4086 {
4087 paPhysExts[iPhysExt].aidx[2] = iShwPT;
4088 paPhysExts[iPhysExt].apte[2] = iPte;
4089 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4090 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
4091 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4092 }
4093 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
4094
4095 /*
4096 * General treatment.
4097 */
4098 const uint16_t iPhysExtStart = iPhysExt;
4099 unsigned cMax = 15;
4100 for (;;)
4101 {
4102 Assert(iPhysExt < pPool->cMaxPhysExts);
4103 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4104 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
4105 {
4106 paPhysExts[iPhysExt].aidx[i] = iShwPT;
4107 paPhysExts[iPhysExt].apte[i] = iPte;
4108 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4109 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
4110 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
4111 }
4112 if (!--cMax)
4113 {
4114 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackOverflows);
4115 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4116 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
4117 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4118 }
4119
4120 /* advance */
4121 iPhysExt = paPhysExts[iPhysExt].iNext;
4122 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4123 break;
4124 }
4125
4126 /*
4127 * Add another extent to the list.
4128 */
4129 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4130 if (!pNew)
4131 {
4132 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackNoExtentsLeft);
4133 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4134 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4135 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4136 }
4137 pNew->iNext = iPhysExtStart;
4138 pNew->aidx[0] = iShwPT;
4139 pNew->apte[0] = iPte;
4140 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4141 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4142}
4143
4144
4145/**
4146 * Add a reference to guest physical page where extents are in use.
4147 *
4148 * @returns The new tracking data for PGMPAGE.
4149 *
4150 * @param pVM Pointer to the VM.
4151 * @param pPhysPage Pointer to the aPages entry in the ram range.
4152 * @param u16 The ram range flags (top 16-bits).
4153 * @param iShwPT The shadow page table index.
4154 * @param iPte Page table entry
4155 */
4156uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4157{
4158 pgmLock(pVM);
4159 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4160 {
4161 /*
4162 * Convert to extent list.
4163 */
4164 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4165 uint16_t iPhysExt;
4166 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4167 if (pPhysExt)
4168 {
4169 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4170 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliased);
4171 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4172 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4173 pPhysExt->aidx[1] = iShwPT;
4174 pPhysExt->apte[1] = iPte;
4175 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4176 }
4177 else
4178 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4179 }
4180 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4181 {
4182 /*
4183 * Insert into the extent list.
4184 */
4185 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4186 }
4187 else
4188 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedLots);
4189 pgmUnlock(pVM);
4190 return u16;
4191}
4192
4193
4194/**
4195 * Clear references to guest physical memory.
4196 *
4197 * @param pPool The pool.
4198 * @param pPage The page.
4199 * @param pPhysPage Pointer to the aPages entry in the ram range.
4200 * @param iPte Shadow PTE index
4201 */
4202void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4203{
4204 PVM pVM = pPool->CTX_SUFF(pVM);
4205 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4206 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4207
4208 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4209 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4210 {
4211 pgmLock(pVM);
4212
4213 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4214 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4215 do
4216 {
4217 Assert(iPhysExt < pPool->cMaxPhysExts);
4218
4219 /*
4220 * Look for the shadow page and check if it's all freed.
4221 */
4222 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4223 {
4224 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4225 && paPhysExts[iPhysExt].apte[i] == iPte)
4226 {
4227 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4228 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4229
4230 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4231 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4232 {
4233 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4234 pgmUnlock(pVM);
4235 return;
4236 }
4237
4238 /* we can free the node. */
4239 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4240 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4241 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4242 {
4243 /* lonely node */
4244 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4245 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4246 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4247 }
4248 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4249 {
4250 /* head */
4251 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4252 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4253 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4254 }
4255 else
4256 {
4257 /* in list */
4258 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4259 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4260 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4261 }
4262 iPhysExt = iPhysExtNext;
4263 pgmUnlock(pVM);
4264 return;
4265 }
4266 }
4267
4268 /* next */
4269 iPhysExtPrev = iPhysExt;
4270 iPhysExt = paPhysExts[iPhysExt].iNext;
4271 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4272
4273 pgmUnlock(pVM);
4274 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4275 }
4276 else /* nothing to do */
4277 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4278}
4279
4280/**
4281 * Clear references to guest physical memory.
4282 *
4283 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4284 * physical address is assumed to be correct, so the linear search can be
4285 * skipped and we can assert at an earlier point.
4286 *
4287 * @param pPool The pool.
4288 * @param pPage The page.
4289 * @param HCPhys The host physical address corresponding to the guest page.
4290 * @param GCPhys The guest physical address corresponding to HCPhys.
4291 * @param iPte Shadow PTE index
4292 */
4293static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4294{
4295 /*
4296 * Lookup the page and check if it checks out before derefing it.
4297 */
4298 PVM pVM = pPool->CTX_SUFF(pVM);
4299 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4300 if (pPhysPage)
4301 {
4302 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4303#ifdef LOG_ENABLED
4304 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4305 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4306#endif
4307 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4308 {
4309 Assert(pPage->cPresent);
4310 Assert(pPool->cPresent);
4311 pPage->cPresent--;
4312 pPool->cPresent--;
4313 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4314 return;
4315 }
4316
4317 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp\n",
4318 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage)));
4319 }
4320 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4321}
4322
4323
4324/**
4325 * Clear references to guest physical memory.
4326 *
4327 * @param pPool The pool.
4328 * @param pPage The page.
4329 * @param HCPhys The host physical address corresponding to the guest page.
4330 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4331 * @param iPte Shadow pte index
4332 */
4333void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4334{
4335 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4336
4337 /*
4338 * Try the hint first.
4339 */
4340 RTHCPHYS HCPhysHinted;
4341 PVM pVM = pPool->CTX_SUFF(pVM);
4342 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4343 if (pPhysPage)
4344 {
4345 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4346 Assert(HCPhysHinted);
4347 if (HCPhysHinted == HCPhys)
4348 {
4349 Assert(pPage->cPresent);
4350 Assert(pPool->cPresent);
4351 pPage->cPresent--;
4352 pPool->cPresent--;
4353 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4354 return;
4355 }
4356 }
4357 else
4358 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4359
4360 /*
4361 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4362 */
4363 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4364 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX);
4365 while (pRam)
4366 {
4367 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4368 while (iPage-- > 0)
4369 {
4370 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4371 {
4372 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4373 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4374 Assert(pPage->cPresent);
4375 Assert(pPool->cPresent);
4376 pPage->cPresent--;
4377 pPool->cPresent--;
4378 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4379 return;
4380 }
4381 }
4382 pRam = pRam->CTX_SUFF(pNext);
4383 }
4384
4385 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4386}
4387
4388
4389/**
4390 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4391 *
4392 * @param pPool The pool.
4393 * @param pPage The page.
4394 * @param pShwPT The shadow page table (mapping of the page).
4395 * @param pGstPT The guest page table.
4396 */
4397DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4398{
4399 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4400 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4401 {
4402 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4403 if (pShwPT->a[i].n.u1Present)
4404 {
4405 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4406 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4407 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & fPgMask, i);
4408 if (!pPage->cPresent)
4409 break;
4410 }
4411 }
4412}
4413
4414
4415/**
4416 * Clear references to guest physical memory in a PAE / 32-bit page table.
4417 *
4418 * @param pPool The pool.
4419 * @param pPage The page.
4420 * @param pShwPT The shadow page table (mapping of the page).
4421 * @param pGstPT The guest page table (just a half one).
4422 */
4423DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4424{
4425 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4426 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4427 {
4428 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4429 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4430 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4431 {
4432 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4433 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4434 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4435 if (!pPage->cPresent)
4436 break;
4437 }
4438 }
4439}
4440
4441
4442/**
4443 * Clear references to guest physical memory in a PAE / PAE page table.
4444 *
4445 * @param pPool The pool.
4446 * @param pPage The page.
4447 * @param pShwPT The shadow page table (mapping of the page).
4448 * @param pGstPT The guest page table.
4449 */
4450DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4451{
4452 RTGCPHYS const fPgMask = pPage->fA20Enabled ? X86_PTE_PAE_PG_MASK : X86_PTE_PAE_PG_MASK & ~RT_BIT_64(20);
4453 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4454 {
4455 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4456 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4457 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4458 {
4459 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4460 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4461 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4462 if (!pPage->cPresent)
4463 break;
4464 }
4465 }
4466}
4467
4468
4469/**
4470 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4471 *
4472 * @param pPool The pool.
4473 * @param pPage The page.
4474 * @param pShwPT The shadow page table (mapping of the page).
4475 */
4476DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4477{
4478 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4479 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4480 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4481 {
4482 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4483 if (pShwPT->a[i].n.u1Present)
4484 {
4485 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4486 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4487 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4488 if (!pPage->cPresent)
4489 break;
4490 }
4491 }
4492}
4493
4494
4495/**
4496 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4497 *
4498 * @param pPool The pool.
4499 * @param pPage The page.
4500 * @param pShwPT The shadow page table (mapping of the page).
4501 */
4502DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4503{
4504 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4505 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4506 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4507 {
4508 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4509 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4510 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4511 {
4512 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4513 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4514 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys & GCPhysA20Mask, i);
4515 if (!pPage->cPresent)
4516 break;
4517 }
4518 }
4519}
4520
4521
4522/**
4523 * Clear references to shadowed pages in an EPT page table.
4524 *
4525 * @param pPool The pool.
4526 * @param pPage The page.
4527 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4528 */
4529DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4530{
4531 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4532 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4533 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4534 {
4535 Assert((pShwPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4536 if (pShwPT->a[i].n.u1Present)
4537 {
4538 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4539 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4540 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4541 if (!pPage->cPresent)
4542 break;
4543 }
4544 }
4545}
4546
4547
4548/**
4549 * Clear references to shadowed pages in a 32 bits page directory.
4550 *
4551 * @param pPool The pool.
4552 * @param pPage The page.
4553 * @param pShwPD The shadow page directory (mapping of the page).
4554 */
4555DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4556{
4557 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4558 {
4559 Assert(!(pShwPD->a[i].u & RT_BIT_32(9)));
4560 if ( pShwPD->a[i].n.u1Present
4561 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4562 )
4563 {
4564 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4565 if (pSubPage)
4566 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4567 else
4568 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4569 }
4570 }
4571}
4572
4573
4574/**
4575 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4576 *
4577 * @param pPool The pool.
4578 * @param pPage The page.
4579 * @param pShwPD The shadow page directory (mapping of the page).
4580 */
4581DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4582{
4583 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4584 {
4585 if ( pShwPD->a[i].n.u1Present
4586 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING))
4587 {
4588#ifdef PGM_WITH_LARGE_PAGES
4589 if (pShwPD->a[i].b.u1Size)
4590 {
4591 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4592 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4593 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK,
4594 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4595 i);
4596 }
4597 else
4598#endif
4599 {
4600 Assert((pShwPD->a[i].u & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4601 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4602 if (pSubPage)
4603 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4604 else
4605 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4606 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4607 }
4608 }
4609 }
4610}
4611
4612
4613/**
4614 * Clear references to shadowed pages in a PAE page directory pointer table.
4615 *
4616 * @param pPool The pool.
4617 * @param pPage The page.
4618 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4619 */
4620DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4621{
4622 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4623 {
4624 Assert((pShwPDPT->a[i].u & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4625 if ( pShwPDPT->a[i].n.u1Present
4626 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4627 )
4628 {
4629 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4630 if (pSubPage)
4631 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4632 else
4633 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4634 }
4635 }
4636}
4637
4638
4639/**
4640 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4641 *
4642 * @param pPool The pool.
4643 * @param pPage The page.
4644 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4645 */
4646DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4647{
4648 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4649 {
4650 Assert((pShwPDPT->a[i].u & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4651 if (pShwPDPT->a[i].n.u1Present)
4652 {
4653 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4654 if (pSubPage)
4655 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4656 else
4657 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4658 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4659 }
4660 }
4661}
4662
4663
4664/**
4665 * Clear references to shadowed pages in a 64-bit level 4 page table.
4666 *
4667 * @param pPool The pool.
4668 * @param pPage The page.
4669 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4670 */
4671DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4672{
4673 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4674 {
4675 Assert((pShwPML4->a[i].u & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4676 if (pShwPML4->a[i].n.u1Present)
4677 {
4678 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4679 if (pSubPage)
4680 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4681 else
4682 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4683 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4684 }
4685 }
4686}
4687
4688
4689/**
4690 * Clear references to shadowed pages in an EPT page directory.
4691 *
4692 * @param pPool The pool.
4693 * @param pPage The page.
4694 * @param pShwPD The shadow page directory (mapping of the page).
4695 */
4696DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4697{
4698 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4699 {
4700 Assert((pShwPD->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4701 if (pShwPD->a[i].n.u1Present)
4702 {
4703#ifdef PGM_WITH_LARGE_PAGES
4704 if (pShwPD->a[i].b.u1Size)
4705 {
4706 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4707 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4708 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK,
4709 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4710 i);
4711 }
4712 else
4713#endif
4714 {
4715 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4716 if (pSubPage)
4717 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4718 else
4719 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4720 }
4721 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4722 }
4723 }
4724}
4725
4726
4727/**
4728 * Clear references to shadowed pages in an EPT page directory pointer table.
4729 *
4730 * @param pPool The pool.
4731 * @param pPage The page.
4732 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4733 */
4734DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4735{
4736 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4737 {
4738 Assert((pShwPDPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4739 if (pShwPDPT->a[i].n.u1Present)
4740 {
4741 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4742 if (pSubPage)
4743 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4744 else
4745 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4746 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4747 }
4748 }
4749}
4750
4751
4752/**
4753 * Clears all references made by this page.
4754 *
4755 * This includes other shadow pages and GC physical addresses.
4756 *
4757 * @param pPool The pool.
4758 * @param pPage The page.
4759 */
4760static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4761{
4762 /*
4763 * Map the shadow page and take action according to the page kind.
4764 */
4765 PVM pVM = pPool->CTX_SUFF(pVM);
4766 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4767 switch (pPage->enmKind)
4768 {
4769 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4770 {
4771 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4772 void *pvGst;
4773 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4774 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4775 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4776 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4777 break;
4778 }
4779
4780 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4781 {
4782 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4783 void *pvGst;
4784 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4785 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
4786 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4787 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4788 break;
4789 }
4790
4791 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4792 {
4793 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4794 void *pvGst;
4795 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4796 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
4797 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4798 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4799 break;
4800 }
4801
4802 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4803 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4804 {
4805 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4806 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4807 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4808 break;
4809 }
4810
4811 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4812 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4813 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4814 {
4815 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4816 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
4817 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4818 break;
4819 }
4820
4821 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4822 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4823 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4824 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4825 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4826 case PGMPOOLKIND_PAE_PD_PHYS:
4827 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4828 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4829 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4830 break;
4831
4832 case PGMPOOLKIND_32BIT_PD_PHYS:
4833 case PGMPOOLKIND_32BIT_PD:
4834 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4835 break;
4836
4837 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4838 case PGMPOOLKIND_PAE_PDPT:
4839 case PGMPOOLKIND_PAE_PDPT_PHYS:
4840 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4841 break;
4842
4843 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4844 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4845 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4846 break;
4847
4848 case PGMPOOLKIND_64BIT_PML4:
4849 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4850 break;
4851
4852 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4853 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4854 break;
4855
4856 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4857 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4858 break;
4859
4860 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4861 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4862 break;
4863
4864 default:
4865 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4866 }
4867
4868 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4869 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4870 ASMMemZeroPage(pvShw);
4871 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4872 pPage->fZeroed = true;
4873 Assert(!pPage->cPresent);
4874 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
4875}
4876
4877
4878/**
4879 * Flushes a pool page.
4880 *
4881 * This moves the page to the free list after removing all user references to it.
4882 *
4883 * @returns VBox status code.
4884 * @retval VINF_SUCCESS on success.
4885 * @param pPool The pool.
4886 * @param HCPhys The HC physical address of the shadow page.
4887 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4888 */
4889int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4890{
4891 PVM pVM = pPool->CTX_SUFF(pVM);
4892 bool fFlushRequired = false;
4893
4894 int rc = VINF_SUCCESS;
4895 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4896 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4897 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4898
4899 /*
4900 * Reject any attempts at flushing any of the special root pages (shall
4901 * not happen).
4902 */
4903 AssertMsgReturn(pPage->idx >= PGMPOOL_IDX_FIRST,
4904 ("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n",
4905 pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx),
4906 VINF_SUCCESS);
4907
4908 pgmLock(pVM);
4909
4910 /*
4911 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4912 */
4913 if (pgmPoolIsPageLocked(pPage))
4914 {
4915 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4916 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4917 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4918 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4919 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4920 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4921 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4922 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4923 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
4924 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
4925 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4926 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4927 pgmUnlock(pVM);
4928 return VINF_SUCCESS;
4929 }
4930
4931#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4932 /* Start a subset so we won't run out of mapping space. */
4933 PVMCPU pVCpu = VMMGetCpu(pVM);
4934 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
4935#endif
4936
4937 /*
4938 * Mark the page as being in need of an ASMMemZeroPage().
4939 */
4940 pPage->fZeroed = false;
4941
4942#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4943 if (pPage->fDirty)
4944 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirtyEntry, false /* do not remove */);
4945#endif
4946
4947 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4948 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4949 fFlushRequired = true;
4950
4951 /*
4952 * Clear the page.
4953 */
4954 pgmPoolTrackClearPageUsers(pPool, pPage);
4955 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4956 pgmPoolTrackDeref(pPool, pPage);
4957 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4958
4959 /*
4960 * Flush it from the cache.
4961 */
4962 pgmPoolCacheFlushPage(pPool, pPage);
4963
4964#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4965 /* Heavy stuff done. */
4966 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
4967#endif
4968
4969 /*
4970 * Deregistering the monitoring.
4971 */
4972 if (pPage->fMonitored)
4973 rc = pgmPoolMonitorFlush(pPool, pPage);
4974
4975 /*
4976 * Free the page.
4977 */
4978 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4979 pPage->iNext = pPool->iFreeHead;
4980 pPool->iFreeHead = pPage->idx;
4981 pPage->enmKind = PGMPOOLKIND_FREE;
4982 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4983 pPage->GCPhys = NIL_RTGCPHYS;
4984 pPage->fReusedFlushPending = false;
4985
4986 pPool->cUsedPages--;
4987
4988 /* Flush the TLBs of all VCPUs if required. */
4989 if ( fFlushRequired
4990 && fFlush)
4991 {
4992 PGM_INVL_ALL_VCPU_TLBS(pVM);
4993 }
4994
4995 pgmUnlock(pVM);
4996 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4997 return rc;
4998}
4999
5000
5001/**
5002 * Frees a usage of a pool page.
5003 *
5004 * The caller is responsible to updating the user table so that it no longer
5005 * references the shadow page.
5006 *
5007 * @param pPool The pool.
5008 * @param HCPhys The HC physical address of the shadow page.
5009 * @param iUser The shadow page pool index of the user table.
5010 * NIL_PGMPOOL_IDX for root pages.
5011 * @param iUserTable The index into the user table (shadowed). Ignored if
5012 * root page.
5013 */
5014void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
5015{
5016 PVM pVM = pPool->CTX_SUFF(pVM);
5017
5018 STAM_PROFILE_START(&pPool->StatFree, a);
5019 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
5020 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
5021 AssertReturnVoid(pPage->idx >= PGMPOOL_IDX_FIRST); /* paranoia (#6349) */
5022
5023 pgmLock(pVM);
5024 if (iUser != NIL_PGMPOOL_IDX)
5025 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
5026 if (!pPage->fCached)
5027 pgmPoolFlushPage(pPool, pPage);
5028 pgmUnlock(pVM);
5029 STAM_PROFILE_STOP(&pPool->StatFree, a);
5030}
5031
5032
5033/**
5034 * Makes one or more free page free.
5035 *
5036 * @returns VBox status code.
5037 * @retval VINF_SUCCESS on success.
5038 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
5039 *
5040 * @param pPool The pool.
5041 * @param enmKind Page table kind
5042 * @param iUser The user of the page.
5043 */
5044static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
5045{
5046 PVM pVM = pPool->CTX_SUFF(pVM);
5047 LogFlow(("pgmPoolMakeMoreFreePages: enmKind=%d iUser=%d\n", enmKind, iUser));
5048 NOREF(enmKind);
5049
5050 /*
5051 * If the pool isn't full grown yet, expand it.
5052 */
5053 if ( pPool->cCurPages < pPool->cMaxPages
5054#if defined(IN_RC)
5055 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
5056 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
5057 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
5058#endif
5059 )
5060 {
5061 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
5062#ifdef IN_RING3
5063 int rc = PGMR3PoolGrow(pVM);
5064#else
5065 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
5066#endif
5067 if (RT_FAILURE(rc))
5068 return rc;
5069 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
5070 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
5071 return VINF_SUCCESS;
5072 }
5073
5074 /*
5075 * Free one cached page.
5076 */
5077 return pgmPoolCacheFreeOne(pPool, iUser);
5078}
5079
5080
5081/**
5082 * Allocates a page from the pool.
5083 *
5084 * This page may actually be a cached page and not in need of any processing
5085 * on the callers part.
5086 *
5087 * @returns VBox status code.
5088 * @retval VINF_SUCCESS if a NEW page was allocated.
5089 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
5090 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
5091 *
5092 * @param pVM Pointer to the VM.
5093 * @param GCPhys The GC physical address of the page we're gonna shadow.
5094 * For 4MB and 2MB PD entries, it's the first address the
5095 * shadow PT is covering.
5096 * @param enmKind The kind of mapping.
5097 * @param enmAccess Access type for the mapping (only relevant for big pages)
5098 * @param fA20Enabled Whether the A20 gate is enabled or not.
5099 * @param iUser The shadow page pool index of the user table. Root
5100 * pages should pass NIL_PGMPOOL_IDX.
5101 * @param iUserTable The index into the user table (shadowed). Ignored for
5102 * root pages (iUser == NIL_PGMPOOL_IDX).
5103 * @param fLockPage Lock the page
5104 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
5105 */
5106int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
5107 uint16_t iUser, uint32_t iUserTable, bool fLockPage, PPPGMPOOLPAGE ppPage)
5108{
5109 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5110 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
5111 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
5112 *ppPage = NULL;
5113 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
5114 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
5115 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
5116
5117 pgmLock(pVM);
5118
5119 if (pPool->fCacheEnabled)
5120 {
5121 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, fA20Enabled, iUser, iUserTable, ppPage);
5122 if (RT_SUCCESS(rc2))
5123 {
5124 if (fLockPage)
5125 pgmPoolLockPage(pPool, *ppPage);
5126 pgmUnlock(pVM);
5127 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5128 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
5129 return rc2;
5130 }
5131 }
5132
5133 /*
5134 * Allocate a new one.
5135 */
5136 int rc = VINF_SUCCESS;
5137 uint16_t iNew = pPool->iFreeHead;
5138 if (iNew == NIL_PGMPOOL_IDX)
5139 {
5140 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
5141 if (RT_FAILURE(rc))
5142 {
5143 pgmUnlock(pVM);
5144 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
5145 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5146 return rc;
5147 }
5148 iNew = pPool->iFreeHead;
5149 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_PGM_POOL_IPE);
5150 }
5151
5152 /* unlink the free head */
5153 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5154 pPool->iFreeHead = pPage->iNext;
5155 pPage->iNext = NIL_PGMPOOL_IDX;
5156
5157 /*
5158 * Initialize it.
5159 */
5160 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5161 pPage->enmKind = enmKind;
5162 pPage->enmAccess = enmAccess;
5163 pPage->GCPhys = GCPhys;
5164 pPage->fA20Enabled = fA20Enabled;
5165 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5166 pPage->fMonitored = false;
5167 pPage->fCached = false;
5168 pPage->fDirty = false;
5169 pPage->fReusedFlushPending = false;
5170 pPage->cModifications = 0;
5171 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5172 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5173 pPage->cPresent = 0;
5174 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5175 pPage->idxDirtyEntry = 0;
5176 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5177 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5178 pPage->cLastAccessHandler = 0;
5179 pPage->cLocked = 0;
5180# ifdef VBOX_STRICT
5181 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5182# endif
5183
5184 /*
5185 * Insert into the tracking and cache. If this fails, free the page.
5186 */
5187 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5188 if (RT_FAILURE(rc3))
5189 {
5190 pPool->cUsedPages--;
5191 pPage->enmKind = PGMPOOLKIND_FREE;
5192 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5193 pPage->GCPhys = NIL_RTGCPHYS;
5194 pPage->iNext = pPool->iFreeHead;
5195 pPool->iFreeHead = pPage->idx;
5196 pgmUnlock(pVM);
5197 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5198 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
5199 return rc3;
5200 }
5201
5202 /*
5203 * Commit the allocation, clear the page and return.
5204 */
5205#ifdef VBOX_WITH_STATISTICS
5206 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5207 pPool->cUsedPagesHigh = pPool->cUsedPages;
5208#endif
5209
5210 if (!pPage->fZeroed)
5211 {
5212 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5213 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5214 ASMMemZeroPage(pv);
5215 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5216 }
5217
5218 *ppPage = pPage;
5219 if (fLockPage)
5220 pgmPoolLockPage(pPool, pPage);
5221 pgmUnlock(pVM);
5222 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5223 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5224 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5225 return rc;
5226}
5227
5228
5229/**
5230 * Frees a usage of a pool page.
5231 *
5232 * @param pVM Pointer to the VM.
5233 * @param HCPhys The HC physical address of the shadow page.
5234 * @param iUser The shadow page pool index of the user table.
5235 * NIL_PGMPOOL_IDX if root page.
5236 * @param iUserTable The index into the user table (shadowed). Ignored if
5237 * root page.
5238 */
5239void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5240{
5241 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5242 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5243 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5244}
5245
5246
5247/**
5248 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5249 *
5250 * @returns Pointer to the shadow page structure.
5251 * @param pPool The pool.
5252 * @param HCPhys The HC physical address of the shadow page.
5253 */
5254PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5255{
5256 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5257
5258 /*
5259 * Look up the page.
5260 */
5261 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5262
5263 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5264 return pPage;
5265}
5266
5267
5268/**
5269 * Internal worker for finding a page for debugging purposes, no assertions.
5270 *
5271 * @returns Pointer to the shadow page structure. NULL on if not found.
5272 * @param pPool The pool.
5273 * @param HCPhys The HC physical address of the shadow page.
5274 */
5275PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5276{
5277 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5278 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5279}
5280
5281#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5282
5283/**
5284 * Flush the specified page if present
5285 *
5286 * @param pVM Pointer to the VM.
5287 * @param GCPhys Guest physical address of the page to flush
5288 */
5289void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5290{
5291 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5292
5293 VM_ASSERT_EMT(pVM);
5294
5295 /*
5296 * Look up the GCPhys in the hash.
5297 */
5298 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5299 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5300 if (i == NIL_PGMPOOL_IDX)
5301 return;
5302
5303 do
5304 {
5305 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5306 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5307 {
5308 switch (pPage->enmKind)
5309 {
5310 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5311 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5312 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5313 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5314 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5315 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5316 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5317 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5318 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5319 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5320 case PGMPOOLKIND_64BIT_PML4:
5321 case PGMPOOLKIND_32BIT_PD:
5322 case PGMPOOLKIND_PAE_PDPT:
5323 {
5324 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5325#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5326 if (pPage->fDirty)
5327 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5328 else
5329#endif
5330 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5331 Assert(!pgmPoolIsPageLocked(pPage));
5332 pgmPoolMonitorChainFlush(pPool, pPage);
5333 return;
5334 }
5335
5336 /* ignore, no monitoring. */
5337 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5338 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5339 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5340 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5341 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5342 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5343 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5344 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5345 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5346 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5347 case PGMPOOLKIND_ROOT_NESTED:
5348 case PGMPOOLKIND_PAE_PD_PHYS:
5349 case PGMPOOLKIND_PAE_PDPT_PHYS:
5350 case PGMPOOLKIND_32BIT_PD_PHYS:
5351 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5352 break;
5353
5354 default:
5355 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5356 }
5357 }
5358
5359 /* next */
5360 i = pPage->iNext;
5361 } while (i != NIL_PGMPOOL_IDX);
5362 return;
5363}
5364
5365#endif /* IN_RING3 */
5366#ifdef IN_RING3
5367
5368/**
5369 * Reset CPU on hot plugging.
5370 *
5371 * @param pVM Pointer to the VM.
5372 * @param pVCpu The virtual CPU.
5373 */
5374void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5375{
5376 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5377
5378 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5379 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5380 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5381}
5382
5383
5384/**
5385 * Flushes the entire cache.
5386 *
5387 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5388 * this and execute this CR3 flush.
5389 *
5390 * @param pPool The pool.
5391 */
5392void pgmR3PoolReset(PVM pVM)
5393{
5394 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5395
5396 PGM_LOCK_ASSERT_OWNER(pVM);
5397 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5398 LogFlow(("pgmR3PoolReset:\n"));
5399
5400 /*
5401 * If there are no pages in the pool, there is nothing to do.
5402 */
5403 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5404 {
5405 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5406 return;
5407 }
5408
5409 /*
5410 * Exit the shadow mode since we're going to clear everything,
5411 * including the root page.
5412 */
5413 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5414 pgmR3ExitShadowModeBeforePoolFlush(&pVM->aCpus[i]);
5415
5416 /*
5417 * Nuke the free list and reinsert all pages into it.
5418 */
5419 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5420 {
5421 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5422
5423 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
5424 if (pPage->fMonitored)
5425 pgmPoolMonitorFlush(pPool, pPage);
5426 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5427 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5428 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5429 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5430 pPage->cModifications = 0;
5431 pPage->GCPhys = NIL_RTGCPHYS;
5432 pPage->enmKind = PGMPOOLKIND_FREE;
5433 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5434 Assert(pPage->idx == i);
5435 pPage->iNext = i + 1;
5436 pPage->fA20Enabled = true;
5437 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5438 pPage->fSeenNonGlobal = false;
5439 pPage->fMonitored = false;
5440 pPage->fDirty = false;
5441 pPage->fCached = false;
5442 pPage->fReusedFlushPending = false;
5443 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5444 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5445 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5446 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5447 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5448 pPage->cLastAccessHandler = 0;
5449 pPage->cLocked = 0;
5450#ifdef VBOX_STRICT
5451 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5452#endif
5453 }
5454 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5455 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5456 pPool->cUsedPages = 0;
5457
5458 /*
5459 * Zap and reinitialize the user records.
5460 */
5461 pPool->cPresent = 0;
5462 pPool->iUserFreeHead = 0;
5463 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5464 const unsigned cMaxUsers = pPool->cMaxUsers;
5465 for (unsigned i = 0; i < cMaxUsers; i++)
5466 {
5467 paUsers[i].iNext = i + 1;
5468 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5469 paUsers[i].iUserTable = 0xfffffffe;
5470 }
5471 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5472
5473 /*
5474 * Clear all the GCPhys links and rebuild the phys ext free list.
5475 */
5476 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
5477 pRam;
5478 pRam = pRam->CTX_SUFF(pNext))
5479 {
5480 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5481 while (iPage-- > 0)
5482 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5483 }
5484
5485 pPool->iPhysExtFreeHead = 0;
5486 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5487 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5488 for (unsigned i = 0; i < cMaxPhysExts; i++)
5489 {
5490 paPhysExts[i].iNext = i + 1;
5491 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5492 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5493 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5494 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5495 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5496 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5497 }
5498 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5499
5500 /*
5501 * Just zap the modified list.
5502 */
5503 pPool->cModifiedPages = 0;
5504 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5505
5506 /*
5507 * Clear the GCPhys hash and the age list.
5508 */
5509 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5510 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5511 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5512 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5513
5514#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5515 /* Clear all dirty pages. */
5516 pPool->idxFreeDirtyPage = 0;
5517 pPool->cDirtyPages = 0;
5518 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
5519 pPool->aDirtyPages[i].uIdx = NIL_PGMPOOL_IDX;
5520#endif
5521
5522 /*
5523 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5524 */
5525 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5526 {
5527 /*
5528 * Re-enter the shadowing mode and assert Sync CR3 FF.
5529 */
5530 PVMCPU pVCpu = &pVM->aCpus[i];
5531 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5532 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5533 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5534 }
5535
5536 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5537}
5538
5539#endif /* IN_RING3 */
5540
5541#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5542/**
5543 * Stringifies a PGMPOOLKIND value.
5544 */
5545static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5546{
5547 switch ((PGMPOOLKIND)enmKind)
5548 {
5549 case PGMPOOLKIND_INVALID:
5550 return "PGMPOOLKIND_INVALID";
5551 case PGMPOOLKIND_FREE:
5552 return "PGMPOOLKIND_FREE";
5553 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5554 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5555 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5556 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5557 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5558 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5559 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5560 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5561 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5562 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5563 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5564 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5565 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5566 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5567 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5568 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5569 case PGMPOOLKIND_32BIT_PD:
5570 return "PGMPOOLKIND_32BIT_PD";
5571 case PGMPOOLKIND_32BIT_PD_PHYS:
5572 return "PGMPOOLKIND_32BIT_PD_PHYS";
5573 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5574 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5575 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5576 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5577 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5578 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5579 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5580 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5581 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5582 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5583 case PGMPOOLKIND_PAE_PD_PHYS:
5584 return "PGMPOOLKIND_PAE_PD_PHYS";
5585 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5586 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5587 case PGMPOOLKIND_PAE_PDPT:
5588 return "PGMPOOLKIND_PAE_PDPT";
5589 case PGMPOOLKIND_PAE_PDPT_PHYS:
5590 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5591 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5592 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5593 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5594 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5595 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5596 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5597 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5598 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5599 case PGMPOOLKIND_64BIT_PML4:
5600 return "PGMPOOLKIND_64BIT_PML4";
5601 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5602 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5603 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5604 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5605 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5606 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5607 case PGMPOOLKIND_ROOT_NESTED:
5608 return "PGMPOOLKIND_ROOT_NESTED";
5609 }
5610 return "Unknown kind!";
5611}
5612#endif /* LOG_ENABLED || VBOX_STRICT */
5613
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette