VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 65471

Last change on this file since 65471 was 65471, checked in by vboxsync, 8 years ago

pgmR3PoolReset: A few missing members (harmless).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 215.9 KB
Line 
1/* $Id: PGMAllPool.cpp 65471 2017-01-26 21:44:44Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2016 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#include <VBox/vmm/pgm.h>
24#include <VBox/vmm/mm.h>
25#include <VBox/vmm/em.h>
26#include <VBox/vmm/cpum.h>
27#ifdef IN_RC
28# include <VBox/vmm/patm.h>
29#endif
30#include "PGMInternal.h"
31#include <VBox/vmm/vm.h>
32#include "PGMInline.h"
33#include <VBox/disopcode.h>
34#include <VBox/vmm/hm_vmx.h>
35
36#include <VBox/log.h>
37#include <VBox/err.h>
38#include <iprt/asm.h>
39#include <iprt/asm-amd64-x86.h>
40#include <iprt/string.h>
41
42
43/*********************************************************************************************************************************
44* Internal Functions *
45*********************************************************************************************************************************/
46RT_C_DECLS_BEGIN
47#if 0 /* unused */
48DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
49DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
50#endif /* unused */
51static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
52static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
54static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
55#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
56static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
57#endif
58#if 0 /*defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)*/
59static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
60#endif
61
62int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
63PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
64void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
65void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
66
67RT_C_DECLS_END
68
69
70#if 0 /* unused */
71/**
72 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
73 *
74 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
75 * @param enmKind The page kind.
76 */
77DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
78{
79 switch (enmKind)
80 {
81 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
82 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
83 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
84 return true;
85 default:
86 return false;
87 }
88}
89#endif /* unused */
90
91
92/**
93 * Flushes a chain of pages sharing the same access monitor.
94 *
95 * @returns VBox status code suitable for scheduling.
96 * @param pPool The pool.
97 * @param pPage A page in the chain.
98 * @todo VBOXSTRICTRC
99 */
100int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
101{
102 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
103
104 /*
105 * Find the list head.
106 */
107 uint16_t idx = pPage->idx;
108 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
109 {
110 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
111 {
112 idx = pPage->iMonitoredPrev;
113 Assert(idx != pPage->idx);
114 pPage = &pPool->aPages[idx];
115 }
116 }
117
118 /*
119 * Iterate the list flushing each shadow page.
120 */
121 int rc = VINF_SUCCESS;
122 for (;;)
123 {
124 idx = pPage->iMonitoredNext;
125 Assert(idx != pPage->idx);
126 if (pPage->idx >= PGMPOOL_IDX_FIRST)
127 {
128 int rc2 = pgmPoolFlushPage(pPool, pPage);
129 AssertRC(rc2);
130 }
131 /* next */
132 if (idx == NIL_PGMPOOL_IDX)
133 break;
134 pPage = &pPool->aPages[idx];
135 }
136 return rc;
137}
138
139
140/**
141 * Wrapper for getting the current context pointer to the entry being modified.
142 *
143 * @returns VBox status code suitable for scheduling.
144 * @param pVM The cross context VM structure.
145 * @param pvDst Destination address
146 * @param pvSrc Pointer to the mapping of @a GCPhysSrc or NULL depending
147 * on the context (e.g. \#PF in R0 & RC).
148 * @param GCPhysSrc The source guest physical address.
149 * @param cb Size of data to read
150 */
151DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, void const *pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
152{
153#if defined(IN_RING3)
154 NOREF(pVM); NOREF(GCPhysSrc);
155 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
156 return VINF_SUCCESS;
157#else
158 /** @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
159 NOREF(pvSrc);
160 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
161#endif
162}
163
164
165/**
166 * Process shadow entries before they are changed by the guest.
167 *
168 * For PT entries we will clear them. For PD entries, we'll simply check
169 * for mapping conflicts and set the SyncCR3 FF if found.
170 *
171 * @param pVCpu The cross context virtual CPU structure.
172 * @param pPool The pool.
173 * @param pPage The head page.
174 * @param GCPhysFault The guest physical fault address.
175 * @param pvAddress Pointer to the mapping of @a GCPhysFault or NULL
176 * depending on the context (e.g. \#PF in R0 & RC).
177 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
178 */
179static void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault,
180 void const *pvAddress, unsigned cbWrite)
181{
182 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
183 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
184 PVM pVM = pPool->CTX_SUFF(pVM);
185 NOREF(pVCpu);
186
187 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n",
188 (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))(uintptr_t)pvAddress, GCPhysFault, cbWrite));
189
190 for (;;)
191 {
192 union
193 {
194 void *pv;
195 PX86PT pPT;
196 PPGMSHWPTPAE pPTPae;
197 PX86PD pPD;
198 PX86PDPAE pPDPae;
199 PX86PDPT pPDPT;
200 PX86PML4 pPML4;
201 } uShw;
202
203 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s write=%#x\n",
204 pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
205
206 uShw.pv = NULL;
207 switch (pPage->enmKind)
208 {
209 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
210 {
211 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
212 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
213 const unsigned iShw = off / sizeof(X86PTE);
214 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
215 if (uShw.pPT->a[iShw].n.u1Present)
216 {
217 X86PTE GstPte;
218
219 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
220 AssertRC(rc);
221 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
222 pgmPoolTracDerefGCPhysHint(pPool, pPage,
223 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
224 GstPte.u & X86_PTE_PG_MASK,
225 iShw);
226 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
227 }
228 break;
229 }
230
231 /* page/2 sized */
232 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
233 {
234 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
235 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
236 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
237 {
238 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
239 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
240 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
241 {
242 X86PTE GstPte;
243 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
244 AssertRC(rc);
245
246 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
247 pgmPoolTracDerefGCPhysHint(pPool, pPage,
248 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
249 GstPte.u & X86_PTE_PG_MASK,
250 iShw);
251 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
252 }
253 }
254 break;
255 }
256
257 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
258 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
259 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
260 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
261 {
262 unsigned iGst = off / sizeof(X86PDE);
263 unsigned iShwPdpt = iGst / 256;
264 unsigned iShw = (iGst % 256) * 2;
265 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
266
267 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
268 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
269 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
270 {
271 for (unsigned i = 0; i < 2; i++)
272 {
273# ifdef VBOX_WITH_RAW_MODE_NOT_R0
274 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
275 {
276 Assert(pgmMapAreMappingsEnabled(pVM));
277 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
278 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
279 break;
280 }
281# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
282 if (uShw.pPDPae->a[iShw+i].n.u1Present)
283 {
284 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
285 pgmPoolFree(pVM,
286 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
287 pPage->idx,
288 iShw + i);
289 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw+i].u, 0);
290 }
291
292 /* paranoia / a bit assumptive. */
293 if ( (off & 3)
294 && (off & 3) + cbWrite > 4)
295 {
296 const unsigned iShw2 = iShw + 2 + i;
297 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
298 {
299# ifdef VBOX_WITH_RAW_MODE_NOT_R0
300 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
301 {
302 Assert(pgmMapAreMappingsEnabled(pVM));
303 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
304 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
305 break;
306 }
307# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
308 if (uShw.pPDPae->a[iShw2].n.u1Present)
309 {
310 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
311 pgmPoolFree(pVM,
312 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
313 pPage->idx,
314 iShw2);
315 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
316 }
317 }
318 }
319 }
320 }
321 break;
322 }
323
324 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
325 {
326 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
327 const unsigned iShw = off / sizeof(X86PTEPAE);
328 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
329 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
330 {
331 X86PTEPAE GstPte;
332 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
333 AssertRC(rc);
334
335 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
336 pgmPoolTracDerefGCPhysHint(pPool, pPage,
337 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
338 GstPte.u & X86_PTE_PAE_PG_MASK,
339 iShw);
340 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
341 }
342
343 /* paranoia / a bit assumptive. */
344 if ( (off & 7)
345 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
346 {
347 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
348 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
349
350 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
351 {
352 X86PTEPAE GstPte;
353 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
354 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
355 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
356 AssertRC(rc);
357 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
358 pgmPoolTracDerefGCPhysHint(pPool, pPage,
359 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
360 GstPte.u & X86_PTE_PAE_PG_MASK,
361 iShw2);
362 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
363 }
364 }
365 break;
366 }
367
368 case PGMPOOLKIND_32BIT_PD:
369 {
370 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
371 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
372
373 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
374 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
375# ifdef VBOX_WITH_RAW_MODE_NOT_R0
376 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
377 {
378 Assert(pgmMapAreMappingsEnabled(pVM));
379 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
380 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
381 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
382 break;
383 }
384 else
385# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
386 {
387 if (uShw.pPD->a[iShw].n.u1Present)
388 {
389 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
390 pgmPoolFree(pVM,
391 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
392 pPage->idx,
393 iShw);
394 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
395 }
396 }
397 /* paranoia / a bit assumptive. */
398 if ( (off & 3)
399 && (off & 3) + cbWrite > sizeof(X86PTE))
400 {
401 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
402 if ( iShw2 != iShw
403 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
404 {
405# ifdef VBOX_WITH_RAW_MODE_NOT_R0
406 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
407 {
408 Assert(pgmMapAreMappingsEnabled(pVM));
409 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
410 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
411 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
412 break;
413 }
414# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
415 if (uShw.pPD->a[iShw2].n.u1Present)
416 {
417 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
418 pgmPoolFree(pVM,
419 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
420 pPage->idx,
421 iShw2);
422 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
423 }
424 }
425 }
426#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). - not working any longer... */
427 if ( uShw.pPD->a[iShw].n.u1Present
428 && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
429 {
430 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
431# ifdef IN_RC /* TLB load - we're pushing things a bit... */
432 ASMProbeReadByte(pvAddress);
433# endif
434 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
435 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
436 }
437#endif
438 break;
439 }
440
441 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
442 {
443 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
444 const unsigned iShw = off / sizeof(X86PDEPAE);
445 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
446#ifdef VBOX_WITH_RAW_MODE_NOT_R0
447 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
448 {
449 Assert(pgmMapAreMappingsEnabled(pVM));
450 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
451 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
452 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
453 break;
454 }
455#endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
456 /*
457 * Causes trouble when the guest uses a PDE to refer to the whole page table level
458 * structure. (Invalidate here; faults later on when it tries to change the page
459 * table entries -> recheck; probably only applies to the RC case.)
460 */
461#ifdef VBOX_WITH_RAW_MODE_NOT_R0
462 else
463#endif
464 {
465 if (uShw.pPDPae->a[iShw].n.u1Present)
466 {
467 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
468 pgmPoolFree(pVM,
469 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
470 pPage->idx,
471 iShw);
472 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
473 }
474 }
475 /* paranoia / a bit assumptive. */
476 if ( (off & 7)
477 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
478 {
479 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
480 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
481
482#ifdef VBOX_WITH_RAW_MODE_NOT_R0
483 if ( iShw2 != iShw
484 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
485 {
486 Assert(pgmMapAreMappingsEnabled(pVM));
487 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
488 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
489 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
490 break;
491 }
492 else
493#endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
494 if (uShw.pPDPae->a[iShw2].n.u1Present)
495 {
496 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
497 pgmPoolFree(pVM,
498 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
499 pPage->idx,
500 iShw2);
501 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
502 }
503 }
504 break;
505 }
506
507 case PGMPOOLKIND_PAE_PDPT:
508 {
509 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
510 /*
511 * Hopefully this doesn't happen very often:
512 * - touching unused parts of the page
513 * - messing with the bits of pd pointers without changing the physical address
514 */
515 /* PDPT roots are not page aligned; 32 byte only! */
516 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
517
518 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
519 const unsigned iShw = offPdpt / sizeof(X86PDPE);
520 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
521 {
522# ifdef VBOX_WITH_RAW_MODE_NOT_R0
523 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
524 {
525 Assert(pgmMapAreMappingsEnabled(pVM));
526 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
527 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
528 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
529 break;
530 }
531 else
532# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
533 if (uShw.pPDPT->a[iShw].n.u1Present)
534 {
535 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
536 pgmPoolFree(pVM,
537 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
538 pPage->idx,
539 iShw);
540 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
541 }
542
543 /* paranoia / a bit assumptive. */
544 if ( (offPdpt & 7)
545 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
546 {
547 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
548 if ( iShw2 != iShw
549 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
550 {
551# ifdef VBOX_WITH_RAW_MODE_NOT_R0
552 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
553 {
554 Assert(pgmMapAreMappingsEnabled(pVM));
555 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
556 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
557 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
558 break;
559 }
560 else
561# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
562 if (uShw.pPDPT->a[iShw2].n.u1Present)
563 {
564 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
565 pgmPoolFree(pVM,
566 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
567 pPage->idx,
568 iShw2);
569 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
570 }
571 }
572 }
573 }
574 break;
575 }
576
577#ifndef IN_RC
578 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
579 {
580 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
581 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
582 const unsigned iShw = off / sizeof(X86PDEPAE);
583 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
584 if (uShw.pPDPae->a[iShw].n.u1Present)
585 {
586 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
587 pgmPoolFree(pVM,
588 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
589 pPage->idx,
590 iShw);
591 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
592 }
593 /* paranoia / a bit assumptive. */
594 if ( (off & 7)
595 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
596 {
597 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
598 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
599
600 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
601 if (uShw.pPDPae->a[iShw2].n.u1Present)
602 {
603 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
604 pgmPoolFree(pVM,
605 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
606 pPage->idx,
607 iShw2);
608 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
609 }
610 }
611 break;
612 }
613
614 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
615 {
616 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
617 /*
618 * Hopefully this doesn't happen very often:
619 * - messing with the bits of pd pointers without changing the physical address
620 */
621 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
622 const unsigned iShw = off / sizeof(X86PDPE);
623 if (uShw.pPDPT->a[iShw].n.u1Present)
624 {
625 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
626 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
627 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
628 }
629 /* paranoia / a bit assumptive. */
630 if ( (off & 7)
631 && (off & 7) + cbWrite > sizeof(X86PDPE))
632 {
633 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
634 if (uShw.pPDPT->a[iShw2].n.u1Present)
635 {
636 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
637 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
638 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
639 }
640 }
641 break;
642 }
643
644 case PGMPOOLKIND_64BIT_PML4:
645 {
646 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
647 /*
648 * Hopefully this doesn't happen very often:
649 * - messing with the bits of pd pointers without changing the physical address
650 */
651 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
652 const unsigned iShw = off / sizeof(X86PDPE);
653 if (uShw.pPML4->a[iShw].n.u1Present)
654 {
655 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
656 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
657 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
658 }
659 /* paranoia / a bit assumptive. */
660 if ( (off & 7)
661 && (off & 7) + cbWrite > sizeof(X86PDPE))
662 {
663 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
664 if (uShw.pPML4->a[iShw2].n.u1Present)
665 {
666 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
667 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
668 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
669 }
670 }
671 break;
672 }
673#endif /* IN_RING0 */
674
675 default:
676 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
677 }
678 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
679
680 /* next */
681 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
682 return;
683 pPage = &pPool->aPages[pPage->iMonitoredNext];
684 }
685}
686
687#ifndef IN_RING3
688
689/**
690 * Checks if a access could be a fork operation in progress.
691 *
692 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
693 *
694 * @returns true if it's likely that we're forking, otherwise false.
695 * @param pPool The pool.
696 * @param pDis The disassembled instruction.
697 * @param offFault The access offset.
698 */
699DECLINLINE(bool) pgmRZPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
700{
701 /*
702 * i386 linux is using btr to clear X86_PTE_RW.
703 * The functions involved are (2.6.16 source inspection):
704 * clear_bit
705 * ptep_set_wrprotect
706 * copy_one_pte
707 * copy_pte_range
708 * copy_pmd_range
709 * copy_pud_range
710 * copy_page_range
711 * dup_mmap
712 * dup_mm
713 * copy_mm
714 * copy_process
715 * do_fork
716 */
717 if ( pDis->pCurInstr->uOpcode == OP_BTR
718 && !(offFault & 4)
719 /** @todo Validate that the bit index is X86_PTE_RW. */
720 )
721 {
722 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,Fork)); RT_NOREF_PV(pPool);
723 return true;
724 }
725 return false;
726}
727
728
729/**
730 * Determine whether the page is likely to have been reused.
731 *
732 * @returns true if we consider the page as being reused for a different purpose.
733 * @returns false if we consider it to still be a paging page.
734 * @param pVM The cross context VM structure.
735 * @param pVCpu The cross context virtual CPU structure.
736 * @param pRegFrame Trap register frame.
737 * @param pDis The disassembly info for the faulting instruction.
738 * @param pvFault The fault address.
739 *
740 * @remark The REP prefix check is left to the caller because of STOSD/W.
741 */
742DECLINLINE(bool) pgmRZPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
743{
744# ifndef IN_RC
745 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
746 if ( HMHasPendingIrq(pVM)
747 && (pRegFrame->rsp - pvFault) < 32)
748 {
749 /* Fault caused by stack writes while trying to inject an interrupt event. */
750 Log(("pgmRZPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
751 return true;
752 }
753# else
754 NOREF(pVM); NOREF(pvFault);
755# endif
756
757 LogFlow(("Reused instr %RGv %d at %RGv param1.fUse=%llx param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->uOpcode, pvFault, pDis->Param1.fUse, pDis->Param1.Base.idxGenReg));
758
759 /* Non-supervisor mode write means it's used for something else. */
760 if (CPUMGetGuestCPL(pVCpu) == 3)
761 return true;
762
763 switch (pDis->pCurInstr->uOpcode)
764 {
765 /* call implies the actual push of the return address faulted */
766 case OP_CALL:
767 Log4(("pgmRZPoolMonitorIsReused: CALL\n"));
768 return true;
769 case OP_PUSH:
770 Log4(("pgmRZPoolMonitorIsReused: PUSH\n"));
771 return true;
772 case OP_PUSHF:
773 Log4(("pgmRZPoolMonitorIsReused: PUSHF\n"));
774 return true;
775 case OP_PUSHA:
776 Log4(("pgmRZPoolMonitorIsReused: PUSHA\n"));
777 return true;
778 case OP_FXSAVE:
779 Log4(("pgmRZPoolMonitorIsReused: FXSAVE\n"));
780 return true;
781 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
782 Log4(("pgmRZPoolMonitorIsReused: MOVNTI\n"));
783 return true;
784 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
785 Log4(("pgmRZPoolMonitorIsReused: MOVNTDQ\n"));
786 return true;
787 case OP_MOVSWD:
788 case OP_STOSWD:
789 if ( pDis->fPrefix == (DISPREFIX_REP|DISPREFIX_REX)
790 && pRegFrame->rcx >= 0x40
791 )
792 {
793 Assert(pDis->uCpuMode == DISCPUMODE_64BIT);
794
795 Log(("pgmRZPoolMonitorIsReused: OP_STOSQ\n"));
796 return true;
797 }
798 break;
799
800 default:
801 /*
802 * Anything having ESP on the left side means stack writes.
803 */
804 if ( ( (pDis->Param1.fUse & DISUSE_REG_GEN32)
805 || (pDis->Param1.fUse & DISUSE_REG_GEN64))
806 && (pDis->Param1.Base.idxGenReg == DISGREG_ESP))
807 {
808 Log4(("pgmRZPoolMonitorIsReused: ESP\n"));
809 return true;
810 }
811 break;
812 }
813
814 /*
815 * Page table updates are very very unlikely to be crossing page boundraries,
816 * and we don't want to deal with that in pgmPoolMonitorChainChanging and such.
817 */
818 uint32_t const cbWrite = DISGetParamSize(pDis, &pDis->Param1);
819 if ( (((uintptr_t)pvFault + cbWrite) >> X86_PAGE_SHIFT) != ((uintptr_t)pvFault >> X86_PAGE_SHIFT) )
820 {
821 Log4(("pgmRZPoolMonitorIsReused: cross page write\n"));
822 return true;
823 }
824
825 /*
826 * Nobody does an unaligned 8 byte write to a page table, right.
827 */
828 if (cbWrite >= 8 && ((uintptr_t)pvFault & 7) != 0)
829 {
830 Log4(("pgmRZPoolMonitorIsReused: Unaligned 8+ byte write\n"));
831 return true;
832 }
833
834 return false;
835}
836
837
838/**
839 * Flushes the page being accessed.
840 *
841 * @returns VBox status code suitable for scheduling.
842 * @param pVM The cross context VM structure.
843 * @param pVCpu The cross context virtual CPU structure.
844 * @param pPool The pool.
845 * @param pPage The pool page (head).
846 * @param pDis The disassembly of the write instruction.
847 * @param pRegFrame The trap register frame.
848 * @param GCPhysFault The fault address as guest physical address.
849 * @param pvFault The fault address.
850 * @todo VBOXSTRICTRC
851 */
852static int pgmRZPoolAccessPfHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
853 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
854{
855 NOREF(pVM); NOREF(GCPhysFault);
856
857 /*
858 * First, do the flushing.
859 */
860 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
861
862 /*
863 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
864 * Must do this in raw mode (!); XP boot will fail otherwise.
865 */
866 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
867 if (rc2 == VINF_SUCCESS)
868 { /* do nothing */ }
869 else if (rc2 == VINF_EM_RESCHEDULE)
870 {
871 if (rc == VINF_SUCCESS)
872 rc = VBOXSTRICTRC_VAL(rc2);
873# ifndef IN_RING3
874 VMCPU_FF_SET(pVCpu, VMCPU_FF_TO_R3);
875# endif
876 }
877 else if (rc2 == VERR_EM_INTERPRETER)
878 {
879# ifdef IN_RC
880 if (PATMIsPatchGCAddr(pVM, pRegFrame->eip))
881 {
882 LogFlow(("pgmRZPoolAccessPfHandlerFlush: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
883 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->eip));
884 rc = VINF_SUCCESS;
885 STAM_COUNTER_INC(&pPool->StatMonitorPfRZIntrFailPatch2);
886 }
887 else
888# endif
889 {
890 rc = VINF_EM_RAW_EMULATE_INSTR;
891 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
892 }
893 }
894 else if (RT_FAILURE_NP(rc2))
895 rc = VBOXSTRICTRC_VAL(rc2);
896 else
897 AssertMsgFailed(("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
898
899 LogFlow(("pgmRZPoolAccessPfHandlerFlush: returns %Rrc (flushed)\n", rc));
900 return rc;
901}
902
903
904/**
905 * Handles the STOSD write accesses.
906 *
907 * @returns VBox status code suitable for scheduling.
908 * @param pVM The cross context VM structure.
909 * @param pPool The pool.
910 * @param pPage The pool page (head).
911 * @param pDis The disassembly of the write instruction.
912 * @param pRegFrame The trap register frame.
913 * @param GCPhysFault The fault address as guest physical address.
914 * @param pvFault The fault address.
915 */
916DECLINLINE(int) pgmRZPoolAccessPfHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
917 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
918{
919 unsigned uIncrement = pDis->Param1.cb;
920 NOREF(pVM);
921
922 Assert(pDis->uCpuMode == DISCPUMODE_32BIT || pDis->uCpuMode == DISCPUMODE_64BIT);
923 Assert(pRegFrame->rcx <= 0x20);
924
925# ifdef VBOX_STRICT
926 if (pDis->uOpMode == DISCPUMODE_32BIT)
927 Assert(uIncrement == 4);
928 else
929 Assert(uIncrement == 8);
930# endif
931
932 Log3(("pgmRZPoolAccessPfHandlerSTOSD\n"));
933
934 /*
935 * Increment the modification counter and insert it into the list
936 * of modified pages the first time.
937 */
938 if (!pPage->cModifications++)
939 pgmPoolMonitorModifiedInsert(pPool, pPage);
940
941 /*
942 * Execute REP STOSD.
943 *
944 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
945 * write situation, meaning that it's safe to write here.
946 */
947 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
948 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
949 while (pRegFrame->rcx)
950 {
951# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
952 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
953 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
954 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
955# else
956 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
957# endif
958# ifdef IN_RC
959 *(uint32_t *)(uintptr_t)pu32 = pRegFrame->eax;
960# else
961 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
962# endif
963 pu32 += uIncrement;
964 GCPhysFault += uIncrement;
965 pRegFrame->rdi += uIncrement;
966 pRegFrame->rcx--;
967 }
968 pRegFrame->rip += pDis->cbInstr;
969
970 LogFlow(("pgmRZPoolAccessPfHandlerSTOSD: returns\n"));
971 return VINF_SUCCESS;
972}
973
974
975/**
976 * Handles the simple write accesses.
977 *
978 * @returns VBox status code suitable for scheduling.
979 * @param pVM The cross context VM structure.
980 * @param pVCpu The cross context virtual CPU structure.
981 * @param pPool The pool.
982 * @param pPage The pool page (head).
983 * @param pDis The disassembly of the write instruction.
984 * @param pRegFrame The trap register frame.
985 * @param GCPhysFault The fault address as guest physical address.
986 * @param pvFault The fault address.
987 * @param pfReused Reused state (in/out)
988 */
989DECLINLINE(int) pgmRZPoolAccessPfHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
990 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
991{
992 Log3(("pgmRZPoolAccessPfHandlerSimple\n"));
993 NOREF(pVM);
994 NOREF(pfReused); /* initialized by caller */
995
996 /*
997 * Increment the modification counter and insert it into the list
998 * of modified pages the first time.
999 */
1000 if (!pPage->cModifications++)
1001 pgmPoolMonitorModifiedInsert(pPool, pPage);
1002
1003 /*
1004 * Clear all the pages. ASSUMES that pvFault is readable.
1005 */
1006# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1007 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
1008# endif
1009
1010 uint32_t cbWrite = DISGetParamSize(pDis, &pDis->Param1);
1011 if (cbWrite <= 8)
1012 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, cbWrite);
1013 else if (cbWrite <= 16)
1014 {
1015 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, 8);
1016 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + 8, NULL, cbWrite - 8);
1017 }
1018 else
1019 {
1020 Assert(cbWrite <= 32);
1021 for (uint32_t off = 0; off < cbWrite; off += 8)
1022 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + off, NULL, RT_MIN(8, cbWrite - off));
1023 }
1024
1025# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1026 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
1027# endif
1028
1029 /*
1030 * Interpret the instruction.
1031 */
1032 VBOXSTRICTRC rc = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
1033 if (RT_SUCCESS(rc))
1034 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
1035 else if (rc == VERR_EM_INTERPRETER)
1036 {
1037 LogFlow(("pgmRZPoolAccessPfHandlerSimple: Interpretation failed for %04x:%RGv - opcode=%d\n",
1038 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode));
1039 rc = VINF_EM_RAW_EMULATE_INSTR;
1040 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
1041 }
1042
1043# if 0 /* experimental code */
1044 if (rc == VINF_SUCCESS)
1045 {
1046 switch (pPage->enmKind)
1047 {
1048 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1049 {
1050 X86PTEPAE GstPte;
1051 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1052 AssertRC(rc);
1053
1054 /* Check the new value written by the guest. If present and with a bogus physical address, then
1055 * it's fairly safe to assume the guest is reusing the PT.
1056 */
1057 if (GstPte.n.u1Present)
1058 {
1059 RTHCPHYS HCPhys = -1;
1060 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1061 if (rc != VINF_SUCCESS)
1062 {
1063 *pfReused = true;
1064 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1065 }
1066 }
1067 break;
1068 }
1069 }
1070 }
1071# endif
1072
1073 LogFlow(("pgmRZPoolAccessPfHandlerSimple: returns %Rrc\n", VBOXSTRICTRC_VAL(rc)));
1074 return VBOXSTRICTRC_VAL(rc);
1075}
1076
1077
1078/**
1079 * @callback_method_impl{FNPGMRZPHYSPFHANDLER,
1080 * \#PF access handler callback for page table pages.}
1081 *
1082 * @remarks The @a pvUser argument points to the PGMPOOLPAGE.
1083 */
1084DECLEXPORT(VBOXSTRICTRC) pgmRZPoolAccessPfHandler(PVM pVM, PVMCPU pVCpu, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame,
1085 RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1086{
1087 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorRZ, a);
1088 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1089 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1090 unsigned cMaxModifications;
1091 bool fForcedFlush = false;
1092 NOREF(uErrorCode);
1093
1094 LogFlow(("pgmRZPoolAccessPfHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1095
1096 pgmLock(pVM);
1097 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1098 {
1099 /* Pool page changed while we were waiting for the lock; ignore. */
1100 Log(("CPU%d: pgmRZPoolAccessPfHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1101 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
1102 pgmUnlock(pVM);
1103 return VINF_SUCCESS;
1104 }
1105# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1106 if (pPage->fDirty)
1107 {
1108 Assert(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH));
1109 pgmUnlock(pVM);
1110 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1111 }
1112# endif
1113
1114# if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1115 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1116 {
1117 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1118 void *pvGst;
1119 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1120 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1121 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1122 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1123 }
1124# endif
1125
1126 /*
1127 * Disassemble the faulting instruction.
1128 */
1129 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1130 int rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
1131 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1132 {
1133 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1134 pgmUnlock(pVM);
1135 return rc;
1136 }
1137
1138 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1139
1140 /*
1141 * We should ALWAYS have the list head as user parameter. This
1142 * is because we use that page to record the changes.
1143 */
1144 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1145
1146# ifdef IN_RING0
1147 /* Maximum nr of modifications depends on the page type. */
1148 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1149 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1150 cMaxModifications = 4;
1151 else
1152 cMaxModifications = 24;
1153# else
1154 cMaxModifications = 48;
1155# endif
1156
1157 /*
1158 * Incremental page table updates should weigh more than random ones.
1159 * (Only applies when started from offset 0)
1160 */
1161 pVCpu->pgm.s.cPoolAccessHandler++;
1162 if ( pPage->GCPtrLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1163 && pPage->GCPtrLastAccessHandlerRip < pRegFrame->rip + 0x40
1164 && pvFault == (pPage->GCPtrLastAccessHandlerFault + pDis->Param1.cb)
1165 && pVCpu->pgm.s.cPoolAccessHandler == pPage->cLastAccessHandler + 1)
1166 {
1167 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1168 Assert(pPage->cModifications < 32000);
1169 pPage->cModifications = pPage->cModifications * 2;
1170 pPage->GCPtrLastAccessHandlerFault = pvFault;
1171 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1172 if (pPage->cModifications >= cMaxModifications)
1173 {
1174 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushReinit);
1175 fForcedFlush = true;
1176 }
1177 }
1178
1179 if (pPage->cModifications >= cMaxModifications)
1180 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1181
1182 /*
1183 * Check if it's worth dealing with.
1184 */
1185 bool fReused = false;
1186 bool fNotReusedNotForking = false;
1187 if ( ( pPage->cModifications < cMaxModifications /** @todo \#define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1188 || pgmPoolIsPageLocked(pPage)
1189 )
1190 && !(fReused = pgmRZPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1191 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1192 {
1193 /*
1194 * Simple instructions, no REP prefix.
1195 */
1196 if (!(pDis->fPrefix & (DISPREFIX_REP | DISPREFIX_REPNE)))
1197 {
1198 rc = pgmRZPoolAccessPfHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1199 if (fReused)
1200 goto flushPage;
1201
1202 /* A mov instruction to change the first page table entry will be remembered so we can detect
1203 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1204 */
1205 if ( rc == VINF_SUCCESS
1206 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1207 && pDis->pCurInstr->uOpcode == OP_MOV
1208 && (pvFault & PAGE_OFFSET_MASK) == 0)
1209 {
1210 pPage->GCPtrLastAccessHandlerFault = pvFault;
1211 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1212 pPage->GCPtrLastAccessHandlerRip = pRegFrame->rip;
1213 /* Make sure we don't kick out a page too quickly. */
1214 if (pPage->cModifications > 8)
1215 pPage->cModifications = 2;
1216 }
1217 else if (pPage->GCPtrLastAccessHandlerFault == pvFault)
1218 {
1219 /* ignore the 2nd write to this page table entry. */
1220 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1221 }
1222 else
1223 {
1224 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
1225 pPage->GCPtrLastAccessHandlerRip = 0;
1226 }
1227
1228 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
1229 pgmUnlock(pVM);
1230 return rc;
1231 }
1232
1233 /*
1234 * Windows is frequently doing small memset() operations (netio test 4k+).
1235 * We have to deal with these or we'll kill the cache and performance.
1236 */
1237 if ( pDis->pCurInstr->uOpcode == OP_STOSWD
1238 && !pRegFrame->eflags.Bits.u1DF
1239 && pDis->uOpMode == pDis->uCpuMode
1240 && pDis->uAddrMode == pDis->uCpuMode)
1241 {
1242 bool fValidStosd = false;
1243
1244 if ( pDis->uCpuMode == DISCPUMODE_32BIT
1245 && pDis->fPrefix == DISPREFIX_REP
1246 && pRegFrame->ecx <= 0x20
1247 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1248 && !((uintptr_t)pvFault & 3)
1249 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1250 )
1251 {
1252 fValidStosd = true;
1253 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1254 }
1255 else
1256 if ( pDis->uCpuMode == DISCPUMODE_64BIT
1257 && pDis->fPrefix == (DISPREFIX_REP | DISPREFIX_REX)
1258 && pRegFrame->rcx <= 0x20
1259 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1260 && !((uintptr_t)pvFault & 7)
1261 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1262 )
1263 {
1264 fValidStosd = true;
1265 }
1266
1267 if (fValidStosd)
1268 {
1269 rc = pgmRZPoolAccessPfHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1270 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZRepStosd, a);
1271 pgmUnlock(pVM);
1272 return rc;
1273 }
1274 }
1275
1276 /* REP prefix, don't bother. */
1277 STAM_COUNTER_INC(&pPool->StatMonitorPfRZRepPrefix);
1278 Log4(("pgmRZPoolAccessPfHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1279 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode, pDis->fPrefix));
1280 fNotReusedNotForking = true;
1281 }
1282
1283# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1284 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1285 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1286 */
1287 if ( pPage->cModifications >= cMaxModifications
1288 && !fForcedFlush
1289 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1290 && ( fNotReusedNotForking
1291 || ( !pgmRZPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
1292 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1293 )
1294 )
1295 {
1296 Assert(!pgmPoolIsPageLocked(pPage));
1297 Assert(pPage->fDirty == false);
1298
1299 /* Flush any monitored duplicates as we will disable write protection. */
1300 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1301 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1302 {
1303 PPGMPOOLPAGE pPageHead = pPage;
1304
1305 /* Find the monitor head. */
1306 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1307 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1308
1309 while (pPageHead)
1310 {
1311 unsigned idxNext = pPageHead->iMonitoredNext;
1312
1313 if (pPageHead != pPage)
1314 {
1315 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1316 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1317 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1318 AssertRC(rc2);
1319 }
1320
1321 if (idxNext == NIL_PGMPOOL_IDX)
1322 break;
1323
1324 pPageHead = &pPool->aPages[idxNext];
1325 }
1326 }
1327
1328 /* The flushing above might fail for locked pages, so double check. */
1329 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1330 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1331 {
1332 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1333
1334 /* Temporarily allow write access to the page table again. */
1335 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK, pPage->GCPhys & PAGE_BASE_GC_MASK);
1336 if (rc == VINF_SUCCESS)
1337 {
1338 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1339 AssertMsg(rc == VINF_SUCCESS
1340 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1341 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1342 || rc == VERR_PAGE_NOT_PRESENT,
1343 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1344# ifdef VBOX_STRICT
1345 pPage->GCPtrDirtyFault = pvFault;
1346# endif
1347
1348 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, a);
1349 pgmUnlock(pVM);
1350 return rc;
1351 }
1352 }
1353 }
1354# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1355
1356 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushModOverflow);
1357flushPage:
1358 /*
1359 * Not worth it, so flush it.
1360 *
1361 * If we considered it to be reused, don't go back to ring-3
1362 * to emulate failed instructions since we usually cannot
1363 * interpret then. This may be a bit risky, in which case
1364 * the reuse detection must be fixed.
1365 */
1366 rc = pgmRZPoolAccessPfHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1367 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1368 && fReused)
1369 {
1370 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1371 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1372 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1373 }
1374 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZFlushPage, a);
1375 pgmUnlock(pVM);
1376 return rc;
1377}
1378
1379#endif /* !IN_RING3 */
1380
1381/**
1382 * @callback_method_impl{FNPGMPHYSHANDLER,
1383 * Access handler for shadowed page table pages.}
1384 *
1385 * @remarks Only uses the VINF_PGM_HANDLER_DO_DEFAULT status.
1386 */
1387PGM_ALL_CB2_DECL(VBOXSTRICTRC)
1388pgmPoolAccessHandler(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf,
1389 PGMACCESSTYPE enmAccessType, PGMACCESSORIGIN enmOrigin, void *pvUser)
1390{
1391 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1392 STAM_PROFILE_START(&pPool->CTX_SUFF_Z(StatMonitor), a);
1393 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1394 LogFlow(("PGM_ALL_CB_DECL: GCPhys=%RGp %p:{.Core=%RHp, .idx=%d, .GCPhys=%RGp, .enmType=%d}\n",
1395 GCPhys, pPage, pPage->Core.Key, pPage->idx, pPage->GCPhys, pPage->enmKind));
1396
1397 NOREF(pvPhys); NOREF(pvBuf); NOREF(enmAccessType);
1398
1399 pgmLock(pVM);
1400
1401#ifdef VBOX_WITH_STATISTICS
1402 /*
1403 * Collect stats on the access.
1404 */
1405 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Sizes)) == 17);
1406 if (cbBuf <= 16 && cbBuf > 0)
1407 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[cbBuf - 1]);
1408 else
1409 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[16]);
1410
1411 uint8_t cbAlign;
1412 switch (pPage->enmKind)
1413 {
1414 default:
1415 cbAlign = 7;
1416 break;
1417 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1418 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1419 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1420 case PGMPOOLKIND_32BIT_PD:
1421 case PGMPOOLKIND_32BIT_PD_PHYS:
1422 cbAlign = 3;
1423 break;
1424 }
1425 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Misaligned)) == 7);
1426 if ((uint8_t)GCPhys & cbAlign)
1427 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Misaligned)[((uint8_t)GCPhys & cbAlign) - 1]);
1428#endif
1429
1430 /*
1431 * Make sure the pool page wasn't modified by a different CPU.
1432 */
1433 if (PHYS_PAGE_ADDRESS(GCPhys) == PHYS_PAGE_ADDRESS(pPage->GCPhys))
1434 {
1435 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1436
1437 /* The max modification count before flushing depends on the context and page type. */
1438#ifdef IN_RING3
1439 uint16_t const cMaxModifications = 96; /* it's cheaper here, right? */
1440#else
1441 uint16_t cMaxModifications;
1442 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1443 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1444 cMaxModifications = 4;
1445 else
1446 cMaxModifications = 24;
1447# ifdef IN_RC
1448 cMaxModifications *= 2; /* traps are cheaper than exists. */
1449# endif
1450#endif
1451
1452 /*
1453 * We don't have to be very sophisticated about this since there are relativly few calls here.
1454 * However, we must try our best to detect any non-cpu accesses (disk / networking).
1455 */
1456 if ( ( pPage->cModifications < cMaxModifications
1457 || pgmPoolIsPageLocked(pPage) )
1458 && enmOrigin != PGMACCESSORIGIN_DEVICE
1459 && cbBuf <= 16)
1460 {
1461 /* Clear the shadow entry. */
1462 if (!pPage->cModifications++)
1463 pgmPoolMonitorModifiedInsert(pPool, pPage);
1464
1465 if (cbBuf <= 8)
1466 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, (uint32_t)cbBuf);
1467 else
1468 {
1469 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, 8);
1470 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys + 8, (uint8_t *)pvBuf + 8, (uint32_t)cbBuf - 8);
1471 }
1472 }
1473 else
1474 {
1475 /* ASSUME that VERR_PGM_POOL_CLEARED can be ignored here and that FFs will deal with it in due time. */
1476 pgmPoolMonitorChainFlush(pPool, pPage);
1477 }
1478
1479 STAM_PROFILE_STOP_EX(&pPool->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1480 }
1481 else
1482 Log(("CPU%d: PGM_ALL_CB_DECL pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhys), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1483 pgmUnlock(pVM);
1484 return VINF_PGM_HANDLER_DO_DEFAULT;
1485}
1486
1487
1488# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1489
1490# if defined(VBOX_STRICT) && !defined(IN_RING3)
1491
1492/**
1493 * Check references to guest physical memory in a PAE / PAE page table.
1494 *
1495 * @param pPool The pool.
1496 * @param pPage The page.
1497 * @param pShwPT The shadow page table (mapping of the page).
1498 * @param pGstPT The guest page table.
1499 */
1500static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1501{
1502 unsigned cErrors = 0;
1503 int LastRc = -1; /* initialized to shut up gcc */
1504 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1505 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1506 PVM pVM = pPool->CTX_SUFF(pVM);
1507
1508#ifdef VBOX_STRICT
1509 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1510 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1511#endif
1512 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1513 {
1514 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1515 {
1516 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1517 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1518 if ( rc != VINF_SUCCESS
1519 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1520 {
1521 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1522 LastPTE = i;
1523 LastRc = rc;
1524 LastHCPhys = HCPhys;
1525 cErrors++;
1526
1527 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1528 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1529 AssertRC(rc);
1530
1531 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1532 {
1533 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1534
1535 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1536 {
1537 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1538
1539 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1540 {
1541 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1542 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1543 {
1544 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1545 }
1546 }
1547
1548 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1549 }
1550 }
1551 }
1552 }
1553 }
1554 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1555}
1556
1557
1558/**
1559 * Check references to guest physical memory in a PAE / 32-bit page table.
1560 *
1561 * @param pPool The pool.
1562 * @param pPage The page.
1563 * @param pShwPT The shadow page table (mapping of the page).
1564 * @param pGstPT The guest page table.
1565 */
1566static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1567{
1568 unsigned cErrors = 0;
1569 int LastRc = -1; /* initialized to shut up gcc */
1570 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1571 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1572 PVM pVM = pPool->CTX_SUFF(pVM);
1573
1574#ifdef VBOX_STRICT
1575 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1576 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1577#endif
1578 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1579 {
1580 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1581 {
1582 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1583 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1584 if ( rc != VINF_SUCCESS
1585 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1586 {
1587 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1588 LastPTE = i;
1589 LastRc = rc;
1590 LastHCPhys = HCPhys;
1591 cErrors++;
1592
1593 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1594 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1595 AssertRC(rc);
1596
1597 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1598 {
1599 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1600
1601 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1602 {
1603 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1604
1605 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1606 {
1607 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1608 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1609 {
1610 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1611 }
1612 }
1613
1614 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1615 }
1616 }
1617 }
1618 }
1619 }
1620 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1621}
1622
1623# endif /* VBOX_STRICT && !IN_RING3 */
1624
1625/**
1626 * Clear references to guest physical memory in a PAE / PAE page table.
1627 *
1628 * @returns nr of changed PTEs
1629 * @param pPool The pool.
1630 * @param pPage The page.
1631 * @param pShwPT The shadow page table (mapping of the page).
1632 * @param pGstPT The guest page table.
1633 * @param pOldGstPT The old cached guest page table.
1634 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1635 * @param pfFlush Flush reused page table (out)
1636 */
1637DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1638 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1639{
1640 unsigned cChanged = 0;
1641
1642#ifdef VBOX_STRICT
1643 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1644 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1645#endif
1646 *pfFlush = false;
1647
1648 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1649 {
1650 /* Check the new value written by the guest. If present and with a bogus physical address, then
1651 * it's fairly safe to assume the guest is reusing the PT.
1652 */
1653 if ( fAllowRemoval
1654 && pGstPT->a[i].n.u1Present)
1655 {
1656 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1657 {
1658 *pfFlush = true;
1659 return ++cChanged;
1660 }
1661 }
1662 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1663 {
1664 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1665 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1666 {
1667#ifdef VBOX_STRICT
1668 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1669 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1670 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1671#endif
1672 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1673 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1674 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1675 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1676
1677 if ( uHostAttr == uGuestAttr
1678 && fHostRW <= fGuestRW)
1679 continue;
1680 }
1681 cChanged++;
1682 /* Something was changed, so flush it. */
1683 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1684 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1685 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1686 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1687 }
1688 }
1689 return cChanged;
1690}
1691
1692
1693/**
1694 * Clear references to guest physical memory in a PAE / PAE page table.
1695 *
1696 * @returns nr of changed PTEs
1697 * @param pPool The pool.
1698 * @param pPage The page.
1699 * @param pShwPT The shadow page table (mapping of the page).
1700 * @param pGstPT The guest page table.
1701 * @param pOldGstPT The old cached guest page table.
1702 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1703 * @param pfFlush Flush reused page table (out)
1704 */
1705DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1706 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1707{
1708 unsigned cChanged = 0;
1709
1710#ifdef VBOX_STRICT
1711 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1712 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1713#endif
1714 *pfFlush = false;
1715
1716 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1717 {
1718 /* Check the new value written by the guest. If present and with a bogus physical address, then
1719 * it's fairly safe to assume the guest is reusing the PT.
1720 */
1721 if ( fAllowRemoval
1722 && pGstPT->a[i].n.u1Present)
1723 {
1724 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK))
1725 {
1726 *pfFlush = true;
1727 return ++cChanged;
1728 }
1729 }
1730 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1731 {
1732 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1733 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1734 {
1735#ifdef VBOX_STRICT
1736 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1737 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1738 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1739#endif
1740 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1741 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1742 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1743 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1744
1745 if ( uHostAttr == uGuestAttr
1746 && fHostRW <= fGuestRW)
1747 continue;
1748 }
1749 cChanged++;
1750 /* Something was changed, so flush it. */
1751 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1752 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1753 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1754 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1755 }
1756 }
1757 return cChanged;
1758}
1759
1760
1761/**
1762 * Flush a dirty page
1763 *
1764 * @param pVM The cross context VM structure.
1765 * @param pPool The pool.
1766 * @param idxSlot Dirty array slot index
1767 * @param fAllowRemoval Allow a reused page table to be removed
1768 */
1769static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1770{
1771 PPGMPOOLPAGE pPage;
1772 unsigned idxPage;
1773
1774 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1775 if (pPool->aDirtyPages[idxSlot].uIdx == NIL_PGMPOOL_IDX)
1776 return;
1777
1778 idxPage = pPool->aDirtyPages[idxSlot].uIdx;
1779 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1780 pPage = &pPool->aPages[idxPage];
1781 Assert(pPage->idx == idxPage);
1782 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1783
1784 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1785 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1786
1787#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1788 PVMCPU pVCpu = VMMGetCpu(pVM);
1789 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
1790#endif
1791
1792 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1793 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK);
1794 Assert(rc == VINF_SUCCESS);
1795 pPage->fDirty = false;
1796
1797#ifdef VBOX_STRICT
1798 uint64_t fFlags = 0;
1799 RTHCPHYS HCPhys;
1800 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->GCPtrDirtyFault, &fFlags, &HCPhys);
1801 AssertMsg( ( rc == VINF_SUCCESS
1802 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1803 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1804 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1805 || rc == VERR_PAGE_NOT_PRESENT,
1806 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->GCPtrDirtyFault, rc, fFlags));
1807#endif
1808
1809 /* Flush those PTEs that have changed. */
1810 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1811 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1812 void *pvGst;
1813 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1814 bool fFlush;
1815 unsigned cChanges;
1816
1817 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1818 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1819 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1820 else
1821 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1822 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1823
1824 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1825 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1826 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1827 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1828
1829 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1830 Assert(pPage->cModifications);
1831 if (cChanges < 4)
1832 pPage->cModifications = 1; /* must use > 0 here */
1833 else
1834 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1835
1836 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1837 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1838 pPool->idxFreeDirtyPage = idxSlot;
1839
1840 pPool->cDirtyPages--;
1841 pPool->aDirtyPages[idxSlot].uIdx = NIL_PGMPOOL_IDX;
1842 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1843 if (fFlush)
1844 {
1845 Assert(fAllowRemoval);
1846 Log(("Flush reused page table!\n"));
1847 pgmPoolFlushPage(pPool, pPage);
1848 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1849 }
1850 else
1851 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1852
1853#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1854 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
1855#endif
1856}
1857
1858
1859# ifndef IN_RING3
1860/**
1861 * Add a new dirty page
1862 *
1863 * @param pVM The cross context VM structure.
1864 * @param pPool The pool.
1865 * @param pPage The page.
1866 */
1867void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1868{
1869 unsigned idxFree;
1870
1871 PGM_LOCK_ASSERT_OWNER(pVM);
1872 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1873 Assert(!pPage->fDirty);
1874
1875 idxFree = pPool->idxFreeDirtyPage;
1876 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1877 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1878
1879 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1880 {
1881 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1882 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1883 }
1884 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1885 AssertMsg(pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1886
1887 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1888
1889 /*
1890 * Make a copy of the guest page table as we require valid GCPhys addresses
1891 * when removing references to physical pages.
1892 * (The HCPhys linear lookup is *extremely* expensive!)
1893 */
1894 void *pvGst;
1895 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1896 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst, (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT) ? PAGE_SIZE : PAGE_SIZE/2);
1897# ifdef VBOX_STRICT
1898 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1899 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1900 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1901 else
1902 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1903 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1904# endif
1905 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1906
1907 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1908 pPage->fDirty = true;
1909 pPage->idxDirtyEntry = (uint8_t)idxFree; Assert(pPage->idxDirtyEntry == idxFree);
1910 pPool->aDirtyPages[idxFree].uIdx = pPage->idx;
1911 pPool->cDirtyPages++;
1912
1913 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1914 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1915 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1916 {
1917 unsigned i;
1918 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1919 {
1920 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1921 if (pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX)
1922 {
1923 pPool->idxFreeDirtyPage = idxFree;
1924 break;
1925 }
1926 }
1927 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1928 }
1929
1930 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX);
1931
1932 /*
1933 * Clear all references to this shadow table. See @bugref{7298}.
1934 */
1935 pgmPoolTrackClearPageUsers(pPool, pPage);
1936}
1937# endif /* !IN_RING3 */
1938
1939
1940/**
1941 * Check if the specified page is dirty (not write monitored)
1942 *
1943 * @return dirty or not
1944 * @param pVM The cross context VM structure.
1945 * @param GCPhys Guest physical address
1946 */
1947bool pgmPoolIsDirtyPage(PVM pVM, RTGCPHYS GCPhys)
1948{
1949 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1950 PGM_LOCK_ASSERT_OWNER(pVM);
1951 if (!pPool->cDirtyPages)
1952 return false;
1953
1954 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1955
1956 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1957 {
1958 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
1959 {
1960 PPGMPOOLPAGE pPage;
1961 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
1962
1963 pPage = &pPool->aPages[idxPage];
1964 if (pPage->GCPhys == GCPhys)
1965 return true;
1966 }
1967 }
1968 return false;
1969}
1970
1971
1972/**
1973 * Reset all dirty pages by reinstating page monitoring.
1974 *
1975 * @param pVM The cross context VM structure.
1976 */
1977void pgmPoolResetDirtyPages(PVM pVM)
1978{
1979 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1980 PGM_LOCK_ASSERT_OWNER(pVM);
1981 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1982
1983 if (!pPool->cDirtyPages)
1984 return;
1985
1986 Log(("pgmPoolResetDirtyPages\n"));
1987 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1988 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1989
1990 pPool->idxFreeDirtyPage = 0;
1991 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1992 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1993 {
1994 unsigned i;
1995 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1996 {
1997 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
1998 {
1999 pPool->idxFreeDirtyPage = i;
2000 break;
2001 }
2002 }
2003 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
2004 }
2005
2006 Assert(pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
2007 return;
2008}
2009
2010
2011/**
2012 * Invalidate the PT entry for the specified page
2013 *
2014 * @param pVM The cross context VM structure.
2015 * @param GCPtrPage Guest page to invalidate
2016 */
2017void pgmPoolResetDirtyPage(PVM pVM, RTGCPTR GCPtrPage)
2018{
2019 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2020 PGM_LOCK_ASSERT_OWNER(pVM);
2021 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
2022
2023 if (!pPool->cDirtyPages)
2024 return;
2025
2026 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage)); RT_NOREF_PV(GCPtrPage);
2027 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2028 {
2029 }
2030}
2031
2032
2033/**
2034 * Reset all dirty pages by reinstating page monitoring.
2035 *
2036 * @param pVM The cross context VM structure.
2037 * @param GCPhysPT Physical address of the page table
2038 */
2039void pgmPoolInvalidateDirtyPage(PVM pVM, RTGCPHYS GCPhysPT)
2040{
2041 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2042 PGM_LOCK_ASSERT_OWNER(pVM);
2043 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
2044 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
2045
2046 if (!pPool->cDirtyPages)
2047 return;
2048
2049 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2050
2051 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2052 {
2053 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
2054 {
2055 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
2056
2057 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
2058 if (pPage->GCPhys == GCPhysPT)
2059 {
2060 idxDirtyPage = i;
2061 break;
2062 }
2063 }
2064 }
2065
2066 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
2067 {
2068 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
2069 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
2070 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
2071 {
2072 unsigned i;
2073 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2074 {
2075 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
2076 {
2077 pPool->idxFreeDirtyPage = i;
2078 break;
2079 }
2080 }
2081 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
2082 }
2083 }
2084}
2085
2086# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
2087
2088/**
2089 * Inserts a page into the GCPhys hash table.
2090 *
2091 * @param pPool The pool.
2092 * @param pPage The page.
2093 */
2094DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2095{
2096 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
2097 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2098 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2099 pPage->iNext = pPool->aiHash[iHash];
2100 pPool->aiHash[iHash] = pPage->idx;
2101}
2102
2103
2104/**
2105 * Removes a page from the GCPhys hash table.
2106 *
2107 * @param pPool The pool.
2108 * @param pPage The page.
2109 */
2110DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2111{
2112 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
2113 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2114 if (pPool->aiHash[iHash] == pPage->idx)
2115 pPool->aiHash[iHash] = pPage->iNext;
2116 else
2117 {
2118 uint16_t iPrev = pPool->aiHash[iHash];
2119 for (;;)
2120 {
2121 const int16_t i = pPool->aPages[iPrev].iNext;
2122 if (i == pPage->idx)
2123 {
2124 pPool->aPages[iPrev].iNext = pPage->iNext;
2125 break;
2126 }
2127 if (i == NIL_PGMPOOL_IDX)
2128 {
2129 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
2130 break;
2131 }
2132 iPrev = i;
2133 }
2134 }
2135 pPage->iNext = NIL_PGMPOOL_IDX;
2136}
2137
2138
2139/**
2140 * Frees up one cache page.
2141 *
2142 * @returns VBox status code.
2143 * @retval VINF_SUCCESS on success.
2144 * @param pPool The pool.
2145 * @param iUser The user index.
2146 */
2147static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
2148{
2149#ifndef IN_RC
2150 const PVM pVM = pPool->CTX_SUFF(pVM);
2151#endif
2152 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
2153 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
2154
2155 /*
2156 * Select one page from the tail of the age list.
2157 */
2158 PPGMPOOLPAGE pPage;
2159 for (unsigned iLoop = 0; ; iLoop++)
2160 {
2161 uint16_t iToFree = pPool->iAgeTail;
2162 if (iToFree == iUser && iUser != NIL_PGMPOOL_IDX)
2163 iToFree = pPool->aPages[iToFree].iAgePrev;
2164/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2165 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2166 {
2167 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2168 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2169 {
2170 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2171 continue;
2172 iToFree = i;
2173 break;
2174 }
2175 }
2176*/
2177 Assert(iToFree != iUser);
2178 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
2179 pPage = &pPool->aPages[iToFree];
2180
2181 /*
2182 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2183 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2184 */
2185 if ( !pgmPoolIsPageLocked(pPage)
2186 && pPage->idx >= PGMPOOL_IDX_FIRST /* paranoia (#6349) */)
2187 break;
2188 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2189 pgmPoolCacheUsed(pPool, pPage);
2190 AssertLogRelReturn(iLoop < 8192, VERR_PGM_POOL_TOO_MANY_LOOPS);
2191 }
2192
2193 /*
2194 * Found a usable page, flush it and return.
2195 */
2196 int rc = pgmPoolFlushPage(pPool, pPage);
2197 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2198 /** @todo find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2199 if (rc == VINF_SUCCESS)
2200 PGM_INVL_ALL_VCPU_TLBS(pVM);
2201 return rc;
2202}
2203
2204
2205/**
2206 * Checks if a kind mismatch is really a page being reused
2207 * or if it's just normal remappings.
2208 *
2209 * @returns true if reused and the cached page (enmKind1) should be flushed
2210 * @returns false if not reused.
2211 * @param enmKind1 The kind of the cached page.
2212 * @param enmKind2 The kind of the requested page.
2213 */
2214static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2215{
2216 switch (enmKind1)
2217 {
2218 /*
2219 * Never reuse them. There is no remapping in non-paging mode.
2220 */
2221 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2222 case PGMPOOLKIND_32BIT_PD_PHYS:
2223 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2224 case PGMPOOLKIND_PAE_PD_PHYS:
2225 case PGMPOOLKIND_PAE_PDPT_PHYS:
2226 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2227 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2228 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2229 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2230 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2231 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2232 return false;
2233
2234 /*
2235 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2236 */
2237 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2238 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2239 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2240 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2241 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2242 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2243 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2244 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2245 case PGMPOOLKIND_32BIT_PD:
2246 case PGMPOOLKIND_PAE_PDPT:
2247 switch (enmKind2)
2248 {
2249 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2250 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2251 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2252 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2253 case PGMPOOLKIND_64BIT_PML4:
2254 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2255 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2256 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2257 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2258 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2259 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2260 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2261 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2262 return true;
2263 default:
2264 return false;
2265 }
2266
2267 /*
2268 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2269 */
2270 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2271 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2272 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2273 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2274 case PGMPOOLKIND_64BIT_PML4:
2275 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2276 switch (enmKind2)
2277 {
2278 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2279 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2280 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2281 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2282 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2283 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2284 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2285 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2286 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2287 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2288 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2289 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2290 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2291 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2292 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2293 return true;
2294 default:
2295 return false;
2296 }
2297
2298 /*
2299 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2300 */
2301 case PGMPOOLKIND_ROOT_NESTED:
2302 return false;
2303
2304 default:
2305 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2306 }
2307}
2308
2309
2310/**
2311 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2312 *
2313 * @returns VBox status code.
2314 * @retval VINF_PGM_CACHED_PAGE on success.
2315 * @retval VERR_FILE_NOT_FOUND if not found.
2316 * @param pPool The pool.
2317 * @param GCPhys The GC physical address of the page we're gonna shadow.
2318 * @param enmKind The kind of mapping.
2319 * @param enmAccess Access type for the mapping (only relevant for big pages)
2320 * @param fA20Enabled Whether the CPU has the A20 gate enabled.
2321 * @param iUser The shadow page pool index of the user table. This is
2322 * NIL_PGMPOOL_IDX for root pages.
2323 * @param iUserTable The index into the user table (shadowed). Ignored if
2324 * root page
2325 * @param ppPage Where to store the pointer to the page.
2326 */
2327static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
2328 uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2329{
2330 /*
2331 * Look up the GCPhys in the hash.
2332 */
2333 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2334 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2335 if (i != NIL_PGMPOOL_IDX)
2336 {
2337 do
2338 {
2339 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2340 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2341 if (pPage->GCPhys == GCPhys)
2342 {
2343 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2344 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess
2345 && pPage->fA20Enabled == fA20Enabled)
2346 {
2347 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2348 * doesn't flush it in case there are no more free use records.
2349 */
2350 pgmPoolCacheUsed(pPool, pPage);
2351
2352 int rc = VINF_SUCCESS;
2353 if (iUser != NIL_PGMPOOL_IDX)
2354 rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2355 if (RT_SUCCESS(rc))
2356 {
2357 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2358 *ppPage = pPage;
2359 if (pPage->cModifications)
2360 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2361 STAM_COUNTER_INC(&pPool->StatCacheHits);
2362 return VINF_PGM_CACHED_PAGE;
2363 }
2364 return rc;
2365 }
2366
2367 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2368 {
2369 /*
2370 * The kind is different. In some cases we should now flush the page
2371 * as it has been reused, but in most cases this is normal remapping
2372 * of PDs as PT or big pages using the GCPhys field in a slightly
2373 * different way than the other kinds.
2374 */
2375 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2376 {
2377 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2378 pgmPoolFlushPage(pPool, pPage);
2379 break;
2380 }
2381 }
2382 }
2383
2384 /* next */
2385 i = pPage->iNext;
2386 } while (i != NIL_PGMPOOL_IDX);
2387 }
2388
2389 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2390 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2391 return VERR_FILE_NOT_FOUND;
2392}
2393
2394
2395/**
2396 * Inserts a page into the cache.
2397 *
2398 * @param pPool The pool.
2399 * @param pPage The cached page.
2400 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2401 */
2402static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2403{
2404 /*
2405 * Insert into the GCPhys hash if the page is fit for that.
2406 */
2407 Assert(!pPage->fCached);
2408 if (fCanBeCached)
2409 {
2410 pPage->fCached = true;
2411 pgmPoolHashInsert(pPool, pPage);
2412 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2413 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2414 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2415 }
2416 else
2417 {
2418 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2419 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2420 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2421 }
2422
2423 /*
2424 * Insert at the head of the age list.
2425 */
2426 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2427 pPage->iAgeNext = pPool->iAgeHead;
2428 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2429 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2430 else
2431 pPool->iAgeTail = pPage->idx;
2432 pPool->iAgeHead = pPage->idx;
2433}
2434
2435
2436/**
2437 * Flushes a cached page.
2438 *
2439 * @param pPool The pool.
2440 * @param pPage The cached page.
2441 */
2442static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2443{
2444 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2445
2446 /*
2447 * Remove the page from the hash.
2448 */
2449 if (pPage->fCached)
2450 {
2451 pPage->fCached = false;
2452 pgmPoolHashRemove(pPool, pPage);
2453 }
2454 else
2455 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2456
2457 /*
2458 * Remove it from the age list.
2459 */
2460 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2461 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2462 else
2463 pPool->iAgeTail = pPage->iAgePrev;
2464 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2465 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2466 else
2467 pPool->iAgeHead = pPage->iAgeNext;
2468 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2469 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2470}
2471
2472
2473/**
2474 * Looks for pages sharing the monitor.
2475 *
2476 * @returns Pointer to the head page.
2477 * @returns NULL if not found.
2478 * @param pPool The Pool
2479 * @param pNewPage The page which is going to be monitored.
2480 */
2481static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2482{
2483 /*
2484 * Look up the GCPhys in the hash.
2485 */
2486 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2487 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2488 if (i == NIL_PGMPOOL_IDX)
2489 return NULL;
2490 do
2491 {
2492 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2493 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2494 && pPage != pNewPage)
2495 {
2496 switch (pPage->enmKind)
2497 {
2498 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2499 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2500 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2501 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2502 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2503 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2504 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2505 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2506 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2507 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2508 case PGMPOOLKIND_64BIT_PML4:
2509 case PGMPOOLKIND_32BIT_PD:
2510 case PGMPOOLKIND_PAE_PDPT:
2511 {
2512 /* find the head */
2513 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2514 {
2515 Assert(pPage->iMonitoredPrev != pPage->idx);
2516 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2517 }
2518 return pPage;
2519 }
2520
2521 /* ignore, no monitoring. */
2522 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2523 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2524 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2525 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2526 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2527 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2528 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2529 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2530 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2531 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2532 case PGMPOOLKIND_ROOT_NESTED:
2533 case PGMPOOLKIND_PAE_PD_PHYS:
2534 case PGMPOOLKIND_PAE_PDPT_PHYS:
2535 case PGMPOOLKIND_32BIT_PD_PHYS:
2536 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2537 break;
2538 default:
2539 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2540 }
2541 }
2542
2543 /* next */
2544 i = pPage->iNext;
2545 } while (i != NIL_PGMPOOL_IDX);
2546 return NULL;
2547}
2548
2549
2550/**
2551 * Enabled write monitoring of a guest page.
2552 *
2553 * @returns VBox status code.
2554 * @retval VINF_SUCCESS on success.
2555 * @param pPool The pool.
2556 * @param pPage The cached page.
2557 */
2558static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2559{
2560 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2561
2562 /*
2563 * Filter out the relevant kinds.
2564 */
2565 switch (pPage->enmKind)
2566 {
2567 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2568 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2569 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2570 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2571 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2572 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2573 case PGMPOOLKIND_64BIT_PML4:
2574 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2575 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2576 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2577 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2578 case PGMPOOLKIND_32BIT_PD:
2579 case PGMPOOLKIND_PAE_PDPT:
2580 break;
2581
2582 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2583 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2584 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2585 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2586 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2587 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2588 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2589 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2590 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2591 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2592 case PGMPOOLKIND_ROOT_NESTED:
2593 /* Nothing to monitor here. */
2594 return VINF_SUCCESS;
2595
2596 case PGMPOOLKIND_32BIT_PD_PHYS:
2597 case PGMPOOLKIND_PAE_PDPT_PHYS:
2598 case PGMPOOLKIND_PAE_PD_PHYS:
2599 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2600 /* Nothing to monitor here. */
2601 return VINF_SUCCESS;
2602 default:
2603 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2604 }
2605
2606 /*
2607 * Install handler.
2608 */
2609 int rc;
2610 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2611 if (pPageHead)
2612 {
2613 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2614 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2615
2616#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2617 if (pPageHead->fDirty)
2618 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirtyEntry, false /* do not remove */);
2619#endif
2620
2621 pPage->iMonitoredPrev = pPageHead->idx;
2622 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2623 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2624 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2625 pPageHead->iMonitoredNext = pPage->idx;
2626 rc = VINF_SUCCESS;
2627 }
2628 else
2629 {
2630 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2631 PVM pVM = pPool->CTX_SUFF(pVM);
2632 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2633 rc = PGMHandlerPhysicalRegister(pVM, GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK, pPool->hAccessHandlerType,
2634 MMHyperCCToR3(pVM, pPage), MMHyperCCToR0(pVM, pPage), MMHyperCCToRC(pVM, pPage),
2635 NIL_RTR3PTR /*pszDesc*/);
2636 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2637 * the heap size should suffice. */
2638 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2639 PVMCPU pVCpu = VMMGetCpu(pVM);
2640 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2641 }
2642 pPage->fMonitored = true;
2643 return rc;
2644}
2645
2646
2647/**
2648 * Disables write monitoring of a guest page.
2649 *
2650 * @returns VBox status code.
2651 * @retval VINF_SUCCESS on success.
2652 * @param pPool The pool.
2653 * @param pPage The cached page.
2654 */
2655static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2656{
2657 /*
2658 * Filter out the relevant kinds.
2659 */
2660 switch (pPage->enmKind)
2661 {
2662 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2663 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2664 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2665 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2666 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2667 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2668 case PGMPOOLKIND_64BIT_PML4:
2669 case PGMPOOLKIND_32BIT_PD:
2670 case PGMPOOLKIND_PAE_PDPT:
2671 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2672 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2673 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2674 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2675 break;
2676
2677 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2678 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2679 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2680 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2681 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2682 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2683 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2684 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2685 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2686 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2687 case PGMPOOLKIND_ROOT_NESTED:
2688 case PGMPOOLKIND_PAE_PD_PHYS:
2689 case PGMPOOLKIND_PAE_PDPT_PHYS:
2690 case PGMPOOLKIND_32BIT_PD_PHYS:
2691 /* Nothing to monitor here. */
2692 Assert(!pPage->fMonitored);
2693 return VINF_SUCCESS;
2694
2695 default:
2696 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2697 }
2698 Assert(pPage->fMonitored);
2699
2700 /*
2701 * Remove the page from the monitored list or uninstall it if last.
2702 */
2703 const PVM pVM = pPool->CTX_SUFF(pVM);
2704 int rc;
2705 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2706 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2707 {
2708 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2709 {
2710 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2711 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2712 rc = PGMHandlerPhysicalChangeUserArgs(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK, MMHyperCCToR3(pVM, pNewHead),
2713 MMHyperCCToR0(pVM, pNewHead), MMHyperCCToRC(pVM, pNewHead));
2714
2715 AssertFatalRCSuccess(rc);
2716 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2717 }
2718 else
2719 {
2720 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2721 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2722 {
2723 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2724 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2725 }
2726 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2727 rc = VINF_SUCCESS;
2728 }
2729 }
2730 else
2731 {
2732 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2733 AssertFatalRC(rc);
2734 PVMCPU pVCpu = VMMGetCpu(pVM);
2735 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2736 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2737 }
2738 pPage->fMonitored = false;
2739
2740 /*
2741 * Remove it from the list of modified pages (if in it).
2742 */
2743 pgmPoolMonitorModifiedRemove(pPool, pPage);
2744
2745 return rc;
2746}
2747
2748
2749/**
2750 * Inserts the page into the list of modified pages.
2751 *
2752 * @param pPool The pool.
2753 * @param pPage The page.
2754 */
2755void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2756{
2757 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2758 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2759 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2760 && pPool->iModifiedHead != pPage->idx,
2761 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2762 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2763 pPool->iModifiedHead, pPool->cModifiedPages));
2764
2765 pPage->iModifiedNext = pPool->iModifiedHead;
2766 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2767 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2768 pPool->iModifiedHead = pPage->idx;
2769 pPool->cModifiedPages++;
2770#ifdef VBOX_WITH_STATISTICS
2771 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2772 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2773#endif
2774}
2775
2776
2777/**
2778 * Removes the page from the list of modified pages and resets the
2779 * modification counter.
2780 *
2781 * @param pPool The pool.
2782 * @param pPage The page which is believed to be in the list of modified pages.
2783 */
2784static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2785{
2786 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2787 if (pPool->iModifiedHead == pPage->idx)
2788 {
2789 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2790 pPool->iModifiedHead = pPage->iModifiedNext;
2791 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2792 {
2793 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2794 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2795 }
2796 pPool->cModifiedPages--;
2797 }
2798 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2799 {
2800 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2801 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2802 {
2803 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2804 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2805 }
2806 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2807 pPool->cModifiedPages--;
2808 }
2809 else
2810 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2811 pPage->cModifications = 0;
2812}
2813
2814
2815/**
2816 * Zaps the list of modified pages, resetting their modification counters in the process.
2817 *
2818 * @param pVM The cross context VM structure.
2819 */
2820static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2821{
2822 pgmLock(pVM);
2823 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2824 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2825
2826 unsigned cPages = 0; NOREF(cPages);
2827
2828#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2829 pgmPoolResetDirtyPages(pVM);
2830#endif
2831
2832 uint16_t idx = pPool->iModifiedHead;
2833 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2834 while (idx != NIL_PGMPOOL_IDX)
2835 {
2836 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2837 idx = pPage->iModifiedNext;
2838 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2839 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2840 pPage->cModifications = 0;
2841 Assert(++cPages);
2842 }
2843 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2844 pPool->cModifiedPages = 0;
2845 pgmUnlock(pVM);
2846}
2847
2848
2849/**
2850 * Handle SyncCR3 pool tasks
2851 *
2852 * @returns VBox status code.
2853 * @retval VINF_SUCCESS if successfully added.
2854 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2855 * @param pVCpu The cross context virtual CPU structure.
2856 * @remark Should only be used when monitoring is available, thus placed in
2857 * the PGMPOOL_WITH_MONITORING \#ifdef.
2858 */
2859int pgmPoolSyncCR3(PVMCPU pVCpu)
2860{
2861 PVM pVM = pVCpu->CTX_SUFF(pVM);
2862 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2863
2864 /*
2865 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2866 * Occasionally we will have to clear all the shadow page tables because we wanted
2867 * to monitor a page which was mapped by too many shadowed page tables. This operation
2868 * sometimes referred to as a 'lightweight flush'.
2869 */
2870# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2871 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2872 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2873# else /* !IN_RING3 */
2874 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2875 {
2876 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2877 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2878
2879 /* Make sure all other VCPUs return to ring 3. */
2880 if (pVM->cCpus > 1)
2881 {
2882 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2883 PGM_INVL_ALL_VCPU_TLBS(pVM);
2884 }
2885 return VINF_PGM_SYNC_CR3;
2886 }
2887# endif /* !IN_RING3 */
2888 else
2889 {
2890 pgmPoolMonitorModifiedClearAll(pVM);
2891
2892 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2893 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2894 {
2895 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2896 return pgmPoolSyncCR3(pVCpu);
2897 }
2898 }
2899 return VINF_SUCCESS;
2900}
2901
2902
2903/**
2904 * Frees up at least one user entry.
2905 *
2906 * @returns VBox status code.
2907 * @retval VINF_SUCCESS if successfully added.
2908 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2909 * @param pPool The pool.
2910 * @param iUser The user index.
2911 */
2912static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2913{
2914 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2915 /*
2916 * Just free cached pages in a braindead fashion.
2917 */
2918 /** @todo walk the age list backwards and free the first with usage. */
2919 int rc = VINF_SUCCESS;
2920 do
2921 {
2922 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2923 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2924 rc = rc2;
2925 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2926 return rc;
2927}
2928
2929
2930/**
2931 * Inserts a page into the cache.
2932 *
2933 * This will create user node for the page, insert it into the GCPhys
2934 * hash, and insert it into the age list.
2935 *
2936 * @returns VBox status code.
2937 * @retval VINF_SUCCESS if successfully added.
2938 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2939 * @param pPool The pool.
2940 * @param pPage The cached page.
2941 * @param GCPhys The GC physical address of the page we're gonna shadow.
2942 * @param iUser The user index.
2943 * @param iUserTable The user table index.
2944 */
2945DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2946{
2947 int rc = VINF_SUCCESS;
2948 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2949
2950 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable)); RT_NOREF_PV(GCPhys);
2951
2952 if (iUser != NIL_PGMPOOL_IDX)
2953 {
2954#ifdef VBOX_STRICT
2955 /*
2956 * Check that the entry doesn't already exists.
2957 */
2958 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2959 {
2960 uint16_t i = pPage->iUserHead;
2961 do
2962 {
2963 Assert(i < pPool->cMaxUsers);
2964 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2965 i = paUsers[i].iNext;
2966 } while (i != NIL_PGMPOOL_USER_INDEX);
2967 }
2968#endif
2969
2970 /*
2971 * Find free a user node.
2972 */
2973 uint16_t i = pPool->iUserFreeHead;
2974 if (i == NIL_PGMPOOL_USER_INDEX)
2975 {
2976 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2977 if (RT_FAILURE(rc))
2978 return rc;
2979 i = pPool->iUserFreeHead;
2980 }
2981
2982 /*
2983 * Unlink the user node from the free list,
2984 * initialize and insert it into the user list.
2985 */
2986 pPool->iUserFreeHead = paUsers[i].iNext;
2987 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2988 paUsers[i].iUser = iUser;
2989 paUsers[i].iUserTable = iUserTable;
2990 pPage->iUserHead = i;
2991 }
2992 else
2993 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2994
2995
2996 /*
2997 * Insert into cache and enable monitoring of the guest page if enabled.
2998 *
2999 * Until we implement caching of all levels, including the CR3 one, we'll
3000 * have to make sure we don't try monitor & cache any recursive reuse of
3001 * a monitored CR3 page. Because all windows versions are doing this we'll
3002 * have to be able to do combined access monitoring, CR3 + PT and
3003 * PD + PT (guest PAE).
3004 *
3005 * Update:
3006 * We're now cooperating with the CR3 monitor if an uncachable page is found.
3007 */
3008 const bool fCanBeMonitored = true;
3009 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
3010 if (fCanBeMonitored)
3011 {
3012 rc = pgmPoolMonitorInsert(pPool, pPage);
3013 AssertRC(rc);
3014 }
3015 return rc;
3016}
3017
3018
3019/**
3020 * Adds a user reference to a page.
3021 *
3022 * This will move the page to the head of the
3023 *
3024 * @returns VBox status code.
3025 * @retval VINF_SUCCESS if successfully added.
3026 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3027 * @param pPool The pool.
3028 * @param pPage The cached page.
3029 * @param iUser The user index.
3030 * @param iUserTable The user table.
3031 */
3032static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3033{
3034 Log3(("pgmPoolTrackAddUser: GCPhys=%RGp iUser=%x iUserTable=%x\n", pPage->GCPhys, iUser, iUserTable));
3035 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3036 Assert(iUser != NIL_PGMPOOL_IDX);
3037
3038# ifdef VBOX_STRICT
3039 /*
3040 * Check that the entry doesn't already exists. We only allow multiple
3041 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
3042 */
3043 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
3044 {
3045 uint16_t i = pPage->iUserHead;
3046 do
3047 {
3048 Assert(i < pPool->cMaxUsers);
3049 /** @todo this assertion looks odd... Shouldn't it be && here? */
3050 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
3051 i = paUsers[i].iNext;
3052 } while (i != NIL_PGMPOOL_USER_INDEX);
3053 }
3054# endif
3055
3056 /*
3057 * Allocate a user node.
3058 */
3059 uint16_t i = pPool->iUserFreeHead;
3060 if (i == NIL_PGMPOOL_USER_INDEX)
3061 {
3062 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
3063 if (RT_FAILURE(rc))
3064 return rc;
3065 i = pPool->iUserFreeHead;
3066 }
3067 pPool->iUserFreeHead = paUsers[i].iNext;
3068
3069 /*
3070 * Initialize the user node and insert it.
3071 */
3072 paUsers[i].iNext = pPage->iUserHead;
3073 paUsers[i].iUser = iUser;
3074 paUsers[i].iUserTable = iUserTable;
3075 pPage->iUserHead = i;
3076
3077# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3078 if (pPage->fDirty)
3079 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirtyEntry, false /* do not remove */);
3080# endif
3081
3082 /*
3083 * Tell the cache to update its replacement stats for this page.
3084 */
3085 pgmPoolCacheUsed(pPool, pPage);
3086 return VINF_SUCCESS;
3087}
3088
3089
3090/**
3091 * Frees a user record associated with a page.
3092 *
3093 * This does not clear the entry in the user table, it simply replaces the
3094 * user record to the chain of free records.
3095 *
3096 * @param pPool The pool.
3097 * @param pPage The shadow page.
3098 * @param iUser The shadow page pool index of the user table.
3099 * @param iUserTable The index into the user table (shadowed).
3100 *
3101 * @remarks Don't call this for root pages.
3102 */
3103static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3104{
3105 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
3106 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3107 Assert(iUser != NIL_PGMPOOL_IDX);
3108
3109 /*
3110 * Unlink and free the specified user entry.
3111 */
3112
3113 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
3114 uint16_t i = pPage->iUserHead;
3115 if ( i != NIL_PGMPOOL_USER_INDEX
3116 && paUsers[i].iUser == iUser
3117 && paUsers[i].iUserTable == iUserTable)
3118 {
3119 pPage->iUserHead = paUsers[i].iNext;
3120
3121 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3122 paUsers[i].iNext = pPool->iUserFreeHead;
3123 pPool->iUserFreeHead = i;
3124 return;
3125 }
3126
3127 /* General: Linear search. */
3128 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
3129 while (i != NIL_PGMPOOL_USER_INDEX)
3130 {
3131 if ( paUsers[i].iUser == iUser
3132 && paUsers[i].iUserTable == iUserTable)
3133 {
3134 if (iPrev != NIL_PGMPOOL_USER_INDEX)
3135 paUsers[iPrev].iNext = paUsers[i].iNext;
3136 else
3137 pPage->iUserHead = paUsers[i].iNext;
3138
3139 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3140 paUsers[i].iNext = pPool->iUserFreeHead;
3141 pPool->iUserFreeHead = i;
3142 return;
3143 }
3144 iPrev = i;
3145 i = paUsers[i].iNext;
3146 }
3147
3148 /* Fatal: didn't find it */
3149 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
3150 iUser, iUserTable, pPage->GCPhys));
3151}
3152
3153
3154#if 0 /* unused */
3155/**
3156 * Gets the entry size of a shadow table.
3157 *
3158 * @param enmKind The kind of page.
3159 *
3160 * @returns The size of the entry in bytes. That is, 4 or 8.
3161 * @returns If the kind is not for a table, an assertion is raised and 0 is
3162 * returned.
3163 */
3164DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
3165{
3166 switch (enmKind)
3167 {
3168 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3169 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3170 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3171 case PGMPOOLKIND_32BIT_PD:
3172 case PGMPOOLKIND_32BIT_PD_PHYS:
3173 return 4;
3174
3175 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3176 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3177 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3178 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3179 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3180 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3181 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3182 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3183 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3184 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3185 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3186 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3187 case PGMPOOLKIND_64BIT_PML4:
3188 case PGMPOOLKIND_PAE_PDPT:
3189 case PGMPOOLKIND_ROOT_NESTED:
3190 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3191 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3192 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3193 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3194 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3195 case PGMPOOLKIND_PAE_PD_PHYS:
3196 case PGMPOOLKIND_PAE_PDPT_PHYS:
3197 return 8;
3198
3199 default:
3200 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3201 }
3202}
3203#endif /* unused */
3204
3205#if 0 /* unused */
3206/**
3207 * Gets the entry size of a guest table.
3208 *
3209 * @param enmKind The kind of page.
3210 *
3211 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3212 * @returns If the kind is not for a table, an assertion is raised and 0 is
3213 * returned.
3214 */
3215DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3216{
3217 switch (enmKind)
3218 {
3219 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3220 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3221 case PGMPOOLKIND_32BIT_PD:
3222 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3223 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3224 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3225 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3226 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3227 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3228 return 4;
3229
3230 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3231 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3232 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3233 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3234 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3235 case PGMPOOLKIND_64BIT_PML4:
3236 case PGMPOOLKIND_PAE_PDPT:
3237 return 8;
3238
3239 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3240 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3241 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3242 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3243 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3244 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3245 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3246 case PGMPOOLKIND_ROOT_NESTED:
3247 case PGMPOOLKIND_PAE_PD_PHYS:
3248 case PGMPOOLKIND_PAE_PDPT_PHYS:
3249 case PGMPOOLKIND_32BIT_PD_PHYS:
3250 /** @todo can we return 0? (nobody is calling this...) */
3251 AssertFailed();
3252 return 0;
3253
3254 default:
3255 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3256 }
3257}
3258#endif /* unused */
3259
3260
3261/**
3262 * Checks one shadow page table entry for a mapping of a physical page.
3263 *
3264 * @returns true / false indicating removal of all relevant PTEs
3265 *
3266 * @param pVM The cross context VM structure.
3267 * @param pPhysPage The guest page in question.
3268 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3269 * @param iShw The shadow page table.
3270 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3271 */
3272static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3273{
3274 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3275 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3276 bool fRet = false;
3277
3278 /*
3279 * Assert sanity.
3280 */
3281 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3282 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3283 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3284
3285 /*
3286 * Then, clear the actual mappings to the page in the shadow PT.
3287 */
3288 switch (pPage->enmKind)
3289 {
3290 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3291 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3292 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3293 {
3294 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3295 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3296 uint32_t u32AndMask = 0;
3297 uint32_t u32OrMask = 0;
3298
3299 if (!fFlushPTEs)
3300 {
3301 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3302 {
3303 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
3304 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
3305 u32OrMask = X86_PTE_RW;
3306 u32AndMask = UINT32_MAX;
3307 fRet = true;
3308 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3309 break;
3310
3311 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3312 u32OrMask = 0;
3313 u32AndMask = ~X86_PTE_RW;
3314 fRet = true;
3315 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3316 break;
3317 default:
3318 /* (shouldn't be here, will assert below) */
3319 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3320 break;
3321 }
3322 }
3323 else
3324 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3325
3326 /* Update the counter if we're removing references. */
3327 if (!u32AndMask)
3328 {
3329 Assert(pPage->cPresent);
3330 Assert(pPool->cPresent);
3331 pPage->cPresent--;
3332 pPool->cPresent--;
3333 }
3334
3335 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3336 {
3337 X86PTE Pte;
3338
3339 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3340 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3341 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3342 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3343
3344 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3345 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3346 return fRet;
3347 }
3348#ifdef LOG_ENABLED
3349 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3350 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3351 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3352 {
3353 Log(("i=%d cFound=%d\n", i, ++cFound));
3354 }
3355#endif
3356 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3357 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3358 break;
3359 }
3360
3361 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3362 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3363 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3364 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3365 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3366 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3367 {
3368 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3369 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3370 uint64_t u64OrMask = 0;
3371 uint64_t u64AndMask = 0;
3372
3373 if (!fFlushPTEs)
3374 {
3375 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3376 {
3377 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3378 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3379 u64OrMask = X86_PTE_RW;
3380 u64AndMask = UINT64_MAX;
3381 fRet = true;
3382 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3383 break;
3384
3385 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3386 u64OrMask = 0;
3387 u64AndMask = ~(uint64_t)X86_PTE_RW;
3388 fRet = true;
3389 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3390 break;
3391
3392 default:
3393 /* (shouldn't be here, will assert below) */
3394 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3395 break;
3396 }
3397 }
3398 else
3399 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3400
3401 /* Update the counter if we're removing references. */
3402 if (!u64AndMask)
3403 {
3404 Assert(pPage->cPresent);
3405 Assert(pPool->cPresent);
3406 pPage->cPresent--;
3407 pPool->cPresent--;
3408 }
3409
3410 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3411 {
3412 X86PTEPAE Pte;
3413
3414 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3415 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3416 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3417 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3418
3419 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3420 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3421 return fRet;
3422 }
3423#ifdef LOG_ENABLED
3424 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3425 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3426 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3427 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3428 Log(("i=%d cFound=%d\n", i, ++cFound));
3429#endif
3430 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3431 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3432 break;
3433 }
3434
3435#ifdef PGM_WITH_LARGE_PAGES
3436 /* Large page case only. */
3437 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3438 {
3439 Assert(pVM->pgm.s.fNestedPaging);
3440
3441 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3442 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3443
3444 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3445 {
3446 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3447 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3448 pPD->a[iPte].u = 0;
3449 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3450
3451 /* Update the counter as we're removing references. */
3452 Assert(pPage->cPresent);
3453 Assert(pPool->cPresent);
3454 pPage->cPresent--;
3455 pPool->cPresent--;
3456
3457 return fRet;
3458 }
3459# ifdef LOG_ENABLED
3460 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3461 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3462 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3463 Log(("i=%d cFound=%d\n", i, ++cFound));
3464# endif
3465 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3466 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3467 break;
3468 }
3469
3470 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3471 case PGMPOOLKIND_PAE_PD_PHYS:
3472 {
3473 Assert(pVM->pgm.s.fNestedPaging);
3474
3475 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3476 PX86PD pPD = (PX86PD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3477
3478 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3479 {
3480 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3481 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3482 pPD->a[iPte].u = 0;
3483 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3484
3485 /* Update the counter as we're removing references. */
3486 Assert(pPage->cPresent);
3487 Assert(pPool->cPresent);
3488 pPage->cPresent--;
3489 pPool->cPresent--;
3490 return fRet;
3491 }
3492# ifdef LOG_ENABLED
3493 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3494 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3495 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3496 Log(("i=%d cFound=%d\n", i, ++cFound));
3497# endif
3498 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3499 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3500 break;
3501 }
3502#endif /* PGM_WITH_LARGE_PAGES */
3503
3504 default:
3505 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3506 }
3507
3508 /* not reached. */
3509#ifndef _MSC_VER
3510 return fRet;
3511#endif
3512}
3513
3514
3515/**
3516 * Scans one shadow page table for mappings of a physical page.
3517 *
3518 * @param pVM The cross context VM structure.
3519 * @param pPhysPage The guest page in question.
3520 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3521 * @param iShw The shadow page table.
3522 */
3523static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3524{
3525 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3526
3527 /* We should only come here with when there's only one reference to this physical page. */
3528 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3529
3530 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3531 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3532 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3533 if (!fKeptPTEs)
3534 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3535 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3536}
3537
3538
3539/**
3540 * Flushes a list of shadow page tables mapping the same physical page.
3541 *
3542 * @param pVM The cross context VM structure.
3543 * @param pPhysPage The guest page in question.
3544 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3545 * @param iPhysExt The physical cross reference extent list to flush.
3546 */
3547static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3548{
3549 PGM_LOCK_ASSERT_OWNER(pVM);
3550 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3551 bool fKeepList = false;
3552
3553 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3554 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt=%u\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3555
3556 const uint16_t iPhysExtStart = iPhysExt;
3557 PPGMPOOLPHYSEXT pPhysExt;
3558 do
3559 {
3560 Assert(iPhysExt < pPool->cMaxPhysExts);
3561 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3562 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3563 {
3564 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3565 {
3566 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3567 if (!fKeptPTEs)
3568 {
3569 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3570 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3571 }
3572 else
3573 fKeepList = true;
3574 }
3575 }
3576 /* next */
3577 iPhysExt = pPhysExt->iNext;
3578 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3579
3580 if (!fKeepList)
3581 {
3582 /* insert the list into the free list and clear the ram range entry. */
3583 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3584 pPool->iPhysExtFreeHead = iPhysExtStart;
3585 /* Invalidate the tracking data. */
3586 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3587 }
3588
3589 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3590}
3591
3592
3593/**
3594 * Flushes all shadow page table mappings of the given guest page.
3595 *
3596 * This is typically called when the host page backing the guest one has been
3597 * replaced or when the page protection was changed due to a guest access
3598 * caught by the monitoring.
3599 *
3600 * @returns VBox status code.
3601 * @retval VINF_SUCCESS if all references has been successfully cleared.
3602 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3603 * pool cleaning. FF and sync flags are set.
3604 *
3605 * @param pVM The cross context VM structure.
3606 * @param GCPhysPage GC physical address of the page in question
3607 * @param pPhysPage The guest page in question.
3608 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3609 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3610 * flushed, it is NOT touched if this isn't necessary.
3611 * The caller MUST initialized this to @a false.
3612 */
3613int pgmPoolTrackUpdateGCPhys(PVM pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3614{
3615 PVMCPU pVCpu = VMMGetCpu(pVM);
3616 pgmLock(pVM);
3617 int rc = VINF_SUCCESS;
3618
3619#ifdef PGM_WITH_LARGE_PAGES
3620 /* Is this page part of a large page? */
3621 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3622 {
3623 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3624 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3625
3626 /* Fetch the large page base. */
3627 PPGMPAGE pLargePage;
3628 if (GCPhysBase != GCPhysPage)
3629 {
3630 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3631 AssertFatal(pLargePage);
3632 }
3633 else
3634 pLargePage = pPhysPage;
3635
3636 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3637
3638 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3639 {
3640 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3641 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3642 pVM->pgm.s.cLargePagesDisabled++;
3643
3644 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3645 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3646
3647 *pfFlushTLBs = true;
3648 pgmUnlock(pVM);
3649 return rc;
3650 }
3651 }
3652#else
3653 NOREF(GCPhysPage);
3654#endif /* PGM_WITH_LARGE_PAGES */
3655
3656 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3657 if (u16)
3658 {
3659 /*
3660 * The zero page is currently screwing up the tracking and we'll
3661 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3662 * is defined, zero pages won't normally be mapped. Some kind of solution
3663 * will be needed for this problem of course, but it will have to wait...
3664 */
3665 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3666 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3667 rc = VINF_PGM_GCPHYS_ALIASED;
3668 else
3669 {
3670# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC) /** @todo we can drop this now. */
3671 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3672 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3673 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
3674# endif
3675
3676 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3677 {
3678 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3679 pgmPoolTrackFlushGCPhysPT(pVM,
3680 pPhysPage,
3681 fFlushPTEs,
3682 PGMPOOL_TD_GET_IDX(u16));
3683 }
3684 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3685 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3686 else
3687 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3688 *pfFlushTLBs = true;
3689
3690# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
3691 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
3692# endif
3693 }
3694 }
3695
3696 if (rc == VINF_PGM_GCPHYS_ALIASED)
3697 {
3698 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3699 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3700 rc = VINF_PGM_SYNC_CR3;
3701 }
3702 pgmUnlock(pVM);
3703 return rc;
3704}
3705
3706
3707/**
3708 * Scans all shadow page tables for mappings of a physical page.
3709 *
3710 * This may be slow, but it's most likely more efficient than cleaning
3711 * out the entire page pool / cache.
3712 *
3713 * @returns VBox status code.
3714 * @retval VINF_SUCCESS if all references has been successfully cleared.
3715 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3716 * a page pool cleaning.
3717 *
3718 * @param pVM The cross context VM structure.
3719 * @param pPhysPage The guest page in question.
3720 */
3721int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3722{
3723 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3724 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3725 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3726 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3727
3728 /*
3729 * There is a limit to what makes sense.
3730 */
3731 if ( pPool->cPresent > 1024
3732 && pVM->cCpus == 1)
3733 {
3734 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3735 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3736 return VINF_PGM_GCPHYS_ALIASED;
3737 }
3738
3739 /*
3740 * Iterate all the pages until we've encountered all that in use.
3741 * This is simple but not quite optimal solution.
3742 */
3743 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P; /** @todo drop X86_PTE_P here as we always test if present separately, anyway. */
3744 const uint32_t u32 = u64; /** @todo move into the 32BIT_PT_xx case */
3745 unsigned cLeft = pPool->cUsedPages;
3746 unsigned iPage = pPool->cCurPages;
3747 while (--iPage >= PGMPOOL_IDX_FIRST)
3748 {
3749 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3750 if ( pPage->GCPhys != NIL_RTGCPHYS
3751 && pPage->cPresent)
3752 {
3753 switch (pPage->enmKind)
3754 {
3755 /*
3756 * We only care about shadow page tables.
3757 */
3758 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3759 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3760 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3761 {
3762 unsigned cPresent = pPage->cPresent;
3763 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3764 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3765 if (pPT->a[i].n.u1Present)
3766 {
3767 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3768 {
3769 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3770 pPT->a[i].u = 0;
3771
3772 /* Update the counter as we're removing references. */
3773 Assert(pPage->cPresent);
3774 Assert(pPool->cPresent);
3775 pPage->cPresent--;
3776 pPool->cPresent--;
3777 }
3778 if (!--cPresent)
3779 break;
3780 }
3781 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3782 break;
3783 }
3784
3785 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3786 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3787 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3788 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3789 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3790 {
3791 unsigned cPresent = pPage->cPresent;
3792 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3793 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3794 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3795 {
3796 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3797 {
3798 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3799 PGMSHWPTEPAE_SET(pPT->a[i], 0); /// @todo why not atomic?
3800
3801 /* Update the counter as we're removing references. */
3802 Assert(pPage->cPresent);
3803 Assert(pPool->cPresent);
3804 pPage->cPresent--;
3805 pPool->cPresent--;
3806 }
3807 if (!--cPresent)
3808 break;
3809 }
3810 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3811 break;
3812 }
3813#ifndef IN_RC
3814 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3815 {
3816 unsigned cPresent = pPage->cPresent;
3817 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3818 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3819 if (pPT->a[i].n.u1Present)
3820 {
3821 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3822 {
3823 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3824 pPT->a[i].u = 0;
3825
3826 /* Update the counter as we're removing references. */
3827 Assert(pPage->cPresent);
3828 Assert(pPool->cPresent);
3829 pPage->cPresent--;
3830 pPool->cPresent--;
3831 }
3832 if (!--cPresent)
3833 break;
3834 }
3835 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3836 break;
3837 }
3838#endif
3839 }
3840 if (!--cLeft)
3841 break;
3842 }
3843 }
3844
3845 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3846 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3847
3848 /*
3849 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3850 */
3851 if (pPool->cPresent > 1024)
3852 {
3853 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3854 return VINF_PGM_GCPHYS_ALIASED;
3855 }
3856
3857 return VINF_SUCCESS;
3858}
3859
3860
3861/**
3862 * Clears the user entry in a user table.
3863 *
3864 * This is used to remove all references to a page when flushing it.
3865 */
3866static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3867{
3868 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3869 Assert(pUser->iUser < pPool->cCurPages);
3870 uint32_t iUserTable = pUser->iUserTable;
3871
3872 /*
3873 * Map the user page. Ignore references made by fictitious pages.
3874 */
3875 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3876 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3877 union
3878 {
3879 uint64_t *pau64;
3880 uint32_t *pau32;
3881 } u;
3882 if (pUserPage->idx < PGMPOOL_IDX_FIRST)
3883 {
3884 Assert(!pUserPage->pvPageR3);
3885 return;
3886 }
3887 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3888
3889
3890 /* Safety precaution in case we change the paging for other modes too in the future. */
3891 Assert(!pgmPoolIsPageLocked(pPage)); RT_NOREF_PV(pPage);
3892
3893#ifdef VBOX_STRICT
3894 /*
3895 * Some sanity checks.
3896 */
3897 switch (pUserPage->enmKind)
3898 {
3899 case PGMPOOLKIND_32BIT_PD:
3900 case PGMPOOLKIND_32BIT_PD_PHYS:
3901 Assert(iUserTable < X86_PG_ENTRIES);
3902 break;
3903 case PGMPOOLKIND_PAE_PDPT:
3904 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3905 case PGMPOOLKIND_PAE_PDPT_PHYS:
3906 Assert(iUserTable < 4);
3907 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3908 break;
3909 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3910 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3911 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3912 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3913 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3914 case PGMPOOLKIND_PAE_PD_PHYS:
3915 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3916 break;
3917 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3918 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3919 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3920 break;
3921 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3922 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3923 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3924 break;
3925 case PGMPOOLKIND_64BIT_PML4:
3926 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3927 /* GCPhys >> PAGE_SHIFT is the index here */
3928 break;
3929 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3930 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3931 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3932 break;
3933
3934 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3935 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3936 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3937 break;
3938
3939 case PGMPOOLKIND_ROOT_NESTED:
3940 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3941 break;
3942
3943 default:
3944 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3945 break;
3946 }
3947#endif /* VBOX_STRICT */
3948
3949 /*
3950 * Clear the entry in the user page.
3951 */
3952 switch (pUserPage->enmKind)
3953 {
3954 /* 32-bit entries */
3955 case PGMPOOLKIND_32BIT_PD:
3956 case PGMPOOLKIND_32BIT_PD_PHYS:
3957 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
3958 break;
3959
3960 /* 64-bit entries */
3961 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3962 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3963 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3964 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3965 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3966#ifdef IN_RC
3967 /*
3968 * In 32 bits PAE mode we *must* invalidate the TLB when changing a
3969 * PDPT entry; the CPU fetches them only during cr3 load, so any
3970 * non-present PDPT will continue to cause page faults.
3971 */
3972 ASMReloadCR3();
3973 /* no break */
3974#endif
3975 case PGMPOOLKIND_PAE_PD_PHYS:
3976 case PGMPOOLKIND_PAE_PDPT_PHYS:
3977 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3978 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3979 case PGMPOOLKIND_64BIT_PML4:
3980 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3981 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3982 case PGMPOOLKIND_PAE_PDPT:
3983 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3984 case PGMPOOLKIND_ROOT_NESTED:
3985 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3986 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3987 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
3988 break;
3989
3990 default:
3991 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3992 }
3993 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
3994}
3995
3996
3997/**
3998 * Clears all users of a page.
3999 */
4000static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4001{
4002 /*
4003 * Free all the user records.
4004 */
4005 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
4006
4007 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
4008 uint16_t i = pPage->iUserHead;
4009 while (i != NIL_PGMPOOL_USER_INDEX)
4010 {
4011 /* Clear enter in user table. */
4012 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
4013
4014 /* Free it. */
4015 const uint16_t iNext = paUsers[i].iNext;
4016 paUsers[i].iUser = NIL_PGMPOOL_IDX;
4017 paUsers[i].iNext = pPool->iUserFreeHead;
4018 pPool->iUserFreeHead = i;
4019
4020 /* Next. */
4021 i = iNext;
4022 }
4023 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
4024}
4025
4026
4027/**
4028 * Allocates a new physical cross reference extent.
4029 *
4030 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
4031 * @param pVM The cross context VM structure.
4032 * @param piPhysExt Where to store the phys ext index.
4033 */
4034PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
4035{
4036 PGM_LOCK_ASSERT_OWNER(pVM);
4037 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4038 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
4039 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4040 {
4041 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
4042 return NULL;
4043 }
4044 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4045 pPool->iPhysExtFreeHead = pPhysExt->iNext;
4046 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4047 *piPhysExt = iPhysExt;
4048 return pPhysExt;
4049}
4050
4051
4052/**
4053 * Frees a physical cross reference extent.
4054 *
4055 * @param pVM The cross context VM structure.
4056 * @param iPhysExt The extent to free.
4057 */
4058void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
4059{
4060 PGM_LOCK_ASSERT_OWNER(pVM);
4061 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4062 Assert(iPhysExt < pPool->cMaxPhysExts);
4063 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4064 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4065 {
4066 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4067 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4068 }
4069 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4070 pPool->iPhysExtFreeHead = iPhysExt;
4071}
4072
4073
4074/**
4075 * Frees a physical cross reference extent.
4076 *
4077 * @param pVM The cross context VM structure.
4078 * @param iPhysExt The extent to free.
4079 */
4080void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
4081{
4082 PGM_LOCK_ASSERT_OWNER(pVM);
4083 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4084
4085 const uint16_t iPhysExtStart = iPhysExt;
4086 PPGMPOOLPHYSEXT pPhysExt;
4087 do
4088 {
4089 Assert(iPhysExt < pPool->cMaxPhysExts);
4090 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4091 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4092 {
4093 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4094 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4095 }
4096
4097 /* next */
4098 iPhysExt = pPhysExt->iNext;
4099 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4100
4101 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4102 pPool->iPhysExtFreeHead = iPhysExtStart;
4103}
4104
4105
4106/**
4107 * Insert a reference into a list of physical cross reference extents.
4108 *
4109 * @returns The new tracking data for PGMPAGE.
4110 *
4111 * @param pVM The cross context VM structure.
4112 * @param iPhysExt The physical extent index of the list head.
4113 * @param iShwPT The shadow page table index.
4114 * @param iPte Page table entry
4115 *
4116 */
4117static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
4118{
4119 PGM_LOCK_ASSERT_OWNER(pVM);
4120 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4121 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4122
4123 /*
4124 * Special common cases.
4125 */
4126 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
4127 {
4128 paPhysExts[iPhysExt].aidx[1] = iShwPT;
4129 paPhysExts[iPhysExt].apte[1] = iPte;
4130 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4131 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
4132 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4133 }
4134 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
4135 {
4136 paPhysExts[iPhysExt].aidx[2] = iShwPT;
4137 paPhysExts[iPhysExt].apte[2] = iPte;
4138 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4139 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
4140 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4141 }
4142 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
4143
4144 /*
4145 * General treatment.
4146 */
4147 const uint16_t iPhysExtStart = iPhysExt;
4148 unsigned cMax = 15;
4149 for (;;)
4150 {
4151 Assert(iPhysExt < pPool->cMaxPhysExts);
4152 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4153 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
4154 {
4155 paPhysExts[iPhysExt].aidx[i] = iShwPT;
4156 paPhysExts[iPhysExt].apte[i] = iPte;
4157 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4158 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
4159 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
4160 }
4161 if (!--cMax)
4162 {
4163 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackOverflows);
4164 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4165 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
4166 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4167 }
4168
4169 /* advance */
4170 iPhysExt = paPhysExts[iPhysExt].iNext;
4171 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4172 break;
4173 }
4174
4175 /*
4176 * Add another extent to the list.
4177 */
4178 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4179 if (!pNew)
4180 {
4181 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackNoExtentsLeft);
4182 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4183 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4184 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4185 }
4186 pNew->iNext = iPhysExtStart;
4187 pNew->aidx[0] = iShwPT;
4188 pNew->apte[0] = iPte;
4189 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4190 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4191}
4192
4193
4194/**
4195 * Add a reference to guest physical page where extents are in use.
4196 *
4197 * @returns The new tracking data for PGMPAGE.
4198 *
4199 * @param pVM The cross context VM structure.
4200 * @param pPhysPage Pointer to the aPages entry in the ram range.
4201 * @param u16 The ram range flags (top 16-bits).
4202 * @param iShwPT The shadow page table index.
4203 * @param iPte Page table entry
4204 */
4205uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4206{
4207 pgmLock(pVM);
4208 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4209 {
4210 /*
4211 * Convert to extent list.
4212 */
4213 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4214 uint16_t iPhysExt;
4215 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4216 if (pPhysExt)
4217 {
4218 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4219 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliased);
4220 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4221 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4222 pPhysExt->aidx[1] = iShwPT;
4223 pPhysExt->apte[1] = iPte;
4224 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4225 }
4226 else
4227 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4228 }
4229 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4230 {
4231 /*
4232 * Insert into the extent list.
4233 */
4234 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4235 }
4236 else
4237 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedLots);
4238 pgmUnlock(pVM);
4239 return u16;
4240}
4241
4242
4243/**
4244 * Clear references to guest physical memory.
4245 *
4246 * @param pPool The pool.
4247 * @param pPage The page.
4248 * @param pPhysPage Pointer to the aPages entry in the ram range.
4249 * @param iPte Shadow PTE index
4250 */
4251void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4252{
4253 PVM pVM = pPool->CTX_SUFF(pVM);
4254 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4255 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4256
4257 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4258 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4259 {
4260 pgmLock(pVM);
4261
4262 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4263 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4264 do
4265 {
4266 Assert(iPhysExt < pPool->cMaxPhysExts);
4267
4268 /*
4269 * Look for the shadow page and check if it's all freed.
4270 */
4271 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4272 {
4273 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4274 && paPhysExts[iPhysExt].apte[i] == iPte)
4275 {
4276 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4277 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4278
4279 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4280 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4281 {
4282 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4283 pgmUnlock(pVM);
4284 return;
4285 }
4286
4287 /* we can free the node. */
4288 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4289 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4290 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4291 {
4292 /* lonely node */
4293 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4294 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4295 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4296 }
4297 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4298 {
4299 /* head */
4300 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4301 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4302 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4303 }
4304 else
4305 {
4306 /* in list */
4307 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4308 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4309 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4310 }
4311 iPhysExt = iPhysExtNext;
4312 pgmUnlock(pVM);
4313 return;
4314 }
4315 }
4316
4317 /* next */
4318 iPhysExtPrev = iPhysExt;
4319 iPhysExt = paPhysExts[iPhysExt].iNext;
4320 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4321
4322 pgmUnlock(pVM);
4323 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4324 }
4325 else /* nothing to do */
4326 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4327}
4328
4329/**
4330 * Clear references to guest physical memory.
4331 *
4332 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4333 * physical address is assumed to be correct, so the linear search can be
4334 * skipped and we can assert at an earlier point.
4335 *
4336 * @param pPool The pool.
4337 * @param pPage The page.
4338 * @param HCPhys The host physical address corresponding to the guest page.
4339 * @param GCPhys The guest physical address corresponding to HCPhys.
4340 * @param iPte Shadow PTE index
4341 */
4342static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4343{
4344 /*
4345 * Lookup the page and check if it checks out before derefing it.
4346 */
4347 PVM pVM = pPool->CTX_SUFF(pVM);
4348 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4349 if (pPhysPage)
4350 {
4351 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4352#ifdef LOG_ENABLED
4353 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4354 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4355#endif
4356 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4357 {
4358 Assert(pPage->cPresent);
4359 Assert(pPool->cPresent);
4360 pPage->cPresent--;
4361 pPool->cPresent--;
4362 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4363 return;
4364 }
4365
4366 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp\n",
4367 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage)));
4368 }
4369 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4370}
4371
4372
4373/**
4374 * Clear references to guest physical memory.
4375 *
4376 * @param pPool The pool.
4377 * @param pPage The page.
4378 * @param HCPhys The host physical address corresponding to the guest page.
4379 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4380 * @param iPte Shadow pte index
4381 */
4382void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4383{
4384 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4385
4386 /*
4387 * Try the hint first.
4388 */
4389 RTHCPHYS HCPhysHinted;
4390 PVM pVM = pPool->CTX_SUFF(pVM);
4391 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4392 if (pPhysPage)
4393 {
4394 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4395 Assert(HCPhysHinted);
4396 if (HCPhysHinted == HCPhys)
4397 {
4398 Assert(pPage->cPresent);
4399 Assert(pPool->cPresent);
4400 pPage->cPresent--;
4401 pPool->cPresent--;
4402 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4403 return;
4404 }
4405 }
4406 else
4407 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4408
4409 /*
4410 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4411 */
4412 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4413 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX);
4414 while (pRam)
4415 {
4416 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4417 while (iPage-- > 0)
4418 {
4419 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4420 {
4421 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4422 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4423 Assert(pPage->cPresent);
4424 Assert(pPool->cPresent);
4425 pPage->cPresent--;
4426 pPool->cPresent--;
4427 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4428 return;
4429 }
4430 }
4431 pRam = pRam->CTX_SUFF(pNext);
4432 }
4433
4434 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4435}
4436
4437
4438/**
4439 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4440 *
4441 * @param pPool The pool.
4442 * @param pPage The page.
4443 * @param pShwPT The shadow page table (mapping of the page).
4444 * @param pGstPT The guest page table.
4445 */
4446DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4447{
4448 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4449 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4450 {
4451 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4452 if (pShwPT->a[i].n.u1Present)
4453 {
4454 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4455 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4456 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & fPgMask, i);
4457 if (!pPage->cPresent)
4458 break;
4459 }
4460 }
4461}
4462
4463
4464/**
4465 * Clear references to guest physical memory in a PAE / 32-bit page table.
4466 *
4467 * @param pPool The pool.
4468 * @param pPage The page.
4469 * @param pShwPT The shadow page table (mapping of the page).
4470 * @param pGstPT The guest page table (just a half one).
4471 */
4472DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4473{
4474 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4475 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4476 {
4477 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4478 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4479 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4480 {
4481 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4482 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4483 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4484 if (!pPage->cPresent)
4485 break;
4486 }
4487 }
4488}
4489
4490
4491/**
4492 * Clear references to guest physical memory in a PAE / PAE page table.
4493 *
4494 * @param pPool The pool.
4495 * @param pPage The page.
4496 * @param pShwPT The shadow page table (mapping of the page).
4497 * @param pGstPT The guest page table.
4498 */
4499DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4500{
4501 RTGCPHYS const fPgMask = pPage->fA20Enabled ? X86_PTE_PAE_PG_MASK : X86_PTE_PAE_PG_MASK & ~RT_BIT_64(20);
4502 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4503 {
4504 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4505 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4506 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4507 {
4508 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4509 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4510 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4511 if (!pPage->cPresent)
4512 break;
4513 }
4514 }
4515}
4516
4517
4518/**
4519 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4520 *
4521 * @param pPool The pool.
4522 * @param pPage The page.
4523 * @param pShwPT The shadow page table (mapping of the page).
4524 */
4525DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4526{
4527 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4528 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4529 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4530 {
4531 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4532 if (pShwPT->a[i].n.u1Present)
4533 {
4534 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4535 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4536 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4537 if (!pPage->cPresent)
4538 break;
4539 }
4540 }
4541}
4542
4543
4544/**
4545 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4546 *
4547 * @param pPool The pool.
4548 * @param pPage The page.
4549 * @param pShwPT The shadow page table (mapping of the page).
4550 */
4551DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4552{
4553 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4554 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4555 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4556 {
4557 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4558 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4559 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4560 {
4561 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4562 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4563 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys & GCPhysA20Mask, i);
4564 if (!pPage->cPresent)
4565 break;
4566 }
4567 }
4568}
4569
4570
4571/**
4572 * Clear references to shadowed pages in an EPT page table.
4573 *
4574 * @param pPool The pool.
4575 * @param pPage The page.
4576 * @param pShwPT The shadow page directory pointer table (mapping of the
4577 * page).
4578 */
4579DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4580{
4581 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4582 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4583 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4584 {
4585 Assert((pShwPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4586 if (pShwPT->a[i].n.u1Present)
4587 {
4588 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4589 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4590 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4591 if (!pPage->cPresent)
4592 break;
4593 }
4594 }
4595}
4596
4597
4598/**
4599 * Clear references to shadowed pages in a 32 bits page directory.
4600 *
4601 * @param pPool The pool.
4602 * @param pPage The page.
4603 * @param pShwPD The shadow page directory (mapping of the page).
4604 */
4605DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4606{
4607 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4608 {
4609 if ( pShwPD->a[i].n.u1Present
4610 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4611 )
4612 {
4613 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4614 if (pSubPage)
4615 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4616 else
4617 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4618 }
4619 }
4620}
4621
4622
4623/**
4624 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4625 *
4626 * @param pPool The pool.
4627 * @param pPage The page.
4628 * @param pShwPD The shadow page directory (mapping of the page).
4629 */
4630DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4631{
4632 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4633 {
4634 if ( pShwPD->a[i].n.u1Present
4635 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING))
4636 {
4637#ifdef PGM_WITH_LARGE_PAGES
4638 if (pShwPD->a[i].b.u1Size)
4639 {
4640 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4641 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4642 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK,
4643 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4644 i);
4645 }
4646 else
4647#endif
4648 {
4649 Assert((pShwPD->a[i].u & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000000))) == 0);
4650 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4651 if (pSubPage)
4652 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4653 else
4654 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4655 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4656 }
4657 }
4658 }
4659}
4660
4661
4662/**
4663 * Clear references to shadowed pages in a PAE page directory pointer table.
4664 *
4665 * @param pPool The pool.
4666 * @param pPage The page.
4667 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4668 */
4669DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4670{
4671 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4672 {
4673 Assert((pShwPDPT->a[i].u & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4674 if ( pShwPDPT->a[i].n.u1Present
4675 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4676 )
4677 {
4678 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4679 if (pSubPage)
4680 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4681 else
4682 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4683 }
4684 }
4685}
4686
4687
4688/**
4689 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4690 *
4691 * @param pPool The pool.
4692 * @param pPage The page.
4693 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4694 */
4695DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4696{
4697 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4698 {
4699 Assert((pShwPDPT->a[i].u & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4700 if (pShwPDPT->a[i].n.u1Present)
4701 {
4702 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4703 if (pSubPage)
4704 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4705 else
4706 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4707 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4708 }
4709 }
4710}
4711
4712
4713/**
4714 * Clear references to shadowed pages in a 64-bit level 4 page table.
4715 *
4716 * @param pPool The pool.
4717 * @param pPage The page.
4718 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4719 */
4720DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4721{
4722 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4723 {
4724 Assert((pShwPML4->a[i].u & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4725 if (pShwPML4->a[i].n.u1Present)
4726 {
4727 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4728 if (pSubPage)
4729 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4730 else
4731 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4732 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4733 }
4734 }
4735}
4736
4737
4738/**
4739 * Clear references to shadowed pages in an EPT page directory.
4740 *
4741 * @param pPool The pool.
4742 * @param pPage The page.
4743 * @param pShwPD The shadow page directory (mapping of the page).
4744 */
4745DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4746{
4747 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4748 {
4749 Assert((pShwPD->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4750 if (pShwPD->a[i].n.u1Present)
4751 {
4752#ifdef PGM_WITH_LARGE_PAGES
4753 if (pShwPD->a[i].b.u1Size)
4754 {
4755 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4756 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4757 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK,
4758 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4759 i);
4760 }
4761 else
4762#endif
4763 {
4764 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4765 if (pSubPage)
4766 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4767 else
4768 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4769 }
4770 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4771 }
4772 }
4773}
4774
4775
4776/**
4777 * Clear references to shadowed pages in an EPT page directory pointer table.
4778 *
4779 * @param pPool The pool.
4780 * @param pPage The page.
4781 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4782 */
4783DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4784{
4785 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4786 {
4787 Assert((pShwPDPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4788 if (pShwPDPT->a[i].n.u1Present)
4789 {
4790 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4791 if (pSubPage)
4792 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4793 else
4794 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4795 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4796 }
4797 }
4798}
4799
4800
4801/**
4802 * Clears all references made by this page.
4803 *
4804 * This includes other shadow pages and GC physical addresses.
4805 *
4806 * @param pPool The pool.
4807 * @param pPage The page.
4808 */
4809static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4810{
4811 /*
4812 * Map the shadow page and take action according to the page kind.
4813 */
4814 PVM pVM = pPool->CTX_SUFF(pVM);
4815 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4816 switch (pPage->enmKind)
4817 {
4818 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4819 {
4820 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4821 void *pvGst;
4822 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4823 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4824 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4825 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4826 break;
4827 }
4828
4829 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4830 {
4831 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4832 void *pvGst;
4833 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4834 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
4835 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4836 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4837 break;
4838 }
4839
4840 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4841 {
4842 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4843 void *pvGst;
4844 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4845 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
4846 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4847 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4848 break;
4849 }
4850
4851 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4852 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4853 {
4854 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4855 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4856 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4857 break;
4858 }
4859
4860 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4861 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4862 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4863 {
4864 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4865 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
4866 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4867 break;
4868 }
4869
4870 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4871 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4872 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4873 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4874 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4875 case PGMPOOLKIND_PAE_PD_PHYS:
4876 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4877 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4878 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4879 break;
4880
4881 case PGMPOOLKIND_32BIT_PD_PHYS:
4882 case PGMPOOLKIND_32BIT_PD:
4883 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4884 break;
4885
4886 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4887 case PGMPOOLKIND_PAE_PDPT:
4888 case PGMPOOLKIND_PAE_PDPT_PHYS:
4889 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4890 break;
4891
4892 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4893 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4894 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4895 break;
4896
4897 case PGMPOOLKIND_64BIT_PML4:
4898 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4899 break;
4900
4901 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4902 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4903 break;
4904
4905 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4906 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4907 break;
4908
4909 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4910 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4911 break;
4912
4913 default:
4914 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4915 }
4916
4917 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4918 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4919 ASMMemZeroPage(pvShw);
4920 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4921 pPage->fZeroed = true;
4922 Assert(!pPage->cPresent);
4923 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
4924}
4925
4926
4927/**
4928 * Flushes a pool page.
4929 *
4930 * This moves the page to the free list after removing all user references to it.
4931 *
4932 * @returns VBox status code.
4933 * @retval VINF_SUCCESS on success.
4934 * @param pPool The pool.
4935 * @param pPage The shadow page.
4936 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4937 */
4938int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4939{
4940 PVM pVM = pPool->CTX_SUFF(pVM);
4941 bool fFlushRequired = false;
4942
4943 int rc = VINF_SUCCESS;
4944 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4945 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4946 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4947
4948 /*
4949 * Reject any attempts at flushing any of the special root pages (shall
4950 * not happen).
4951 */
4952 AssertMsgReturn(pPage->idx >= PGMPOOL_IDX_FIRST,
4953 ("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n",
4954 pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx),
4955 VINF_SUCCESS);
4956
4957 pgmLock(pVM);
4958
4959 /*
4960 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4961 */
4962 if (pgmPoolIsPageLocked(pPage))
4963 {
4964 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4965 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4966 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4967 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4968 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4969 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4970 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4971 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4972 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
4973 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
4974 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4975 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4976 pgmUnlock(pVM);
4977 return VINF_SUCCESS;
4978 }
4979
4980#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4981 /* Start a subset so we won't run out of mapping space. */
4982 PVMCPU pVCpu = VMMGetCpu(pVM);
4983 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
4984#endif
4985
4986 /*
4987 * Mark the page as being in need of an ASMMemZeroPage().
4988 */
4989 pPage->fZeroed = false;
4990
4991#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4992 if (pPage->fDirty)
4993 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirtyEntry, false /* do not remove */);
4994#endif
4995
4996 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4997 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4998 fFlushRequired = true;
4999
5000 /*
5001 * Clear the page.
5002 */
5003 pgmPoolTrackClearPageUsers(pPool, pPage);
5004 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
5005 pgmPoolTrackDeref(pPool, pPage);
5006 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
5007
5008 /*
5009 * Flush it from the cache.
5010 */
5011 pgmPoolCacheFlushPage(pPool, pPage);
5012
5013#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
5014 /* Heavy stuff done. */
5015 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
5016#endif
5017
5018 /*
5019 * Deregistering the monitoring.
5020 */
5021 if (pPage->fMonitored)
5022 rc = pgmPoolMonitorFlush(pPool, pPage);
5023
5024 /*
5025 * Free the page.
5026 */
5027 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
5028 pPage->iNext = pPool->iFreeHead;
5029 pPool->iFreeHead = pPage->idx;
5030 pPage->enmKind = PGMPOOLKIND_FREE;
5031 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5032 pPage->GCPhys = NIL_RTGCPHYS;
5033 pPage->fReusedFlushPending = false;
5034
5035 pPool->cUsedPages--;
5036
5037 /* Flush the TLBs of all VCPUs if required. */
5038 if ( fFlushRequired
5039 && fFlush)
5040 {
5041 PGM_INVL_ALL_VCPU_TLBS(pVM);
5042 }
5043
5044 pgmUnlock(pVM);
5045 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
5046 return rc;
5047}
5048
5049
5050/**
5051 * Frees a usage of a pool page.
5052 *
5053 * The caller is responsible to updating the user table so that it no longer
5054 * references the shadow page.
5055 *
5056 * @param pPool The pool.
5057 * @param pPage The shadow page.
5058 * @param iUser The shadow page pool index of the user table.
5059 * NIL_PGMPOOL_IDX for root pages.
5060 * @param iUserTable The index into the user table (shadowed). Ignored if
5061 * root page.
5062 */
5063void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
5064{
5065 PVM pVM = pPool->CTX_SUFF(pVM);
5066
5067 STAM_PROFILE_START(&pPool->StatFree, a);
5068 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
5069 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
5070 AssertReturnVoid(pPage->idx >= PGMPOOL_IDX_FIRST); /* paranoia (#6349) */
5071
5072 pgmLock(pVM);
5073 if (iUser != NIL_PGMPOOL_IDX)
5074 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
5075 if (!pPage->fCached)
5076 pgmPoolFlushPage(pPool, pPage);
5077 pgmUnlock(pVM);
5078 STAM_PROFILE_STOP(&pPool->StatFree, a);
5079}
5080
5081
5082/**
5083 * Makes one or more free page free.
5084 *
5085 * @returns VBox status code.
5086 * @retval VINF_SUCCESS on success.
5087 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
5088 *
5089 * @param pPool The pool.
5090 * @param enmKind Page table kind
5091 * @param iUser The user of the page.
5092 */
5093static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
5094{
5095 PVM pVM = pPool->CTX_SUFF(pVM);
5096 LogFlow(("pgmPoolMakeMoreFreePages: enmKind=%d iUser=%d\n", enmKind, iUser));
5097 NOREF(enmKind);
5098
5099 /*
5100 * If the pool isn't full grown yet, expand it.
5101 */
5102 if ( pPool->cCurPages < pPool->cMaxPages
5103#if defined(IN_RC)
5104 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
5105 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
5106 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
5107#endif
5108 )
5109 {
5110 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
5111#ifdef IN_RING3
5112 int rc = PGMR3PoolGrow(pVM);
5113#else
5114 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
5115#endif
5116 if (RT_FAILURE(rc))
5117 return rc;
5118 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
5119 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
5120 return VINF_SUCCESS;
5121 }
5122
5123 /*
5124 * Free one cached page.
5125 */
5126 return pgmPoolCacheFreeOne(pPool, iUser);
5127}
5128
5129
5130/**
5131 * Allocates a page from the pool.
5132 *
5133 * This page may actually be a cached page and not in need of any processing
5134 * on the callers part.
5135 *
5136 * @returns VBox status code.
5137 * @retval VINF_SUCCESS if a NEW page was allocated.
5138 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
5139 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
5140 *
5141 * @param pVM The cross context VM structure.
5142 * @param GCPhys The GC physical address of the page we're gonna shadow.
5143 * For 4MB and 2MB PD entries, it's the first address the
5144 * shadow PT is covering.
5145 * @param enmKind The kind of mapping.
5146 * @param enmAccess Access type for the mapping (only relevant for big pages)
5147 * @param fA20Enabled Whether the A20 gate is enabled or not.
5148 * @param iUser The shadow page pool index of the user table. Root
5149 * pages should pass NIL_PGMPOOL_IDX.
5150 * @param iUserTable The index into the user table (shadowed). Ignored for
5151 * root pages (iUser == NIL_PGMPOOL_IDX).
5152 * @param fLockPage Lock the page
5153 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
5154 */
5155int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
5156 uint16_t iUser, uint32_t iUserTable, bool fLockPage, PPPGMPOOLPAGE ppPage)
5157{
5158 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5159 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
5160 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
5161 *ppPage = NULL;
5162 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
5163 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
5164 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
5165
5166 pgmLock(pVM);
5167
5168 if (pPool->fCacheEnabled)
5169 {
5170 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, fA20Enabled, iUser, iUserTable, ppPage);
5171 if (RT_SUCCESS(rc2))
5172 {
5173 if (fLockPage)
5174 pgmPoolLockPage(pPool, *ppPage);
5175 pgmUnlock(pVM);
5176 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5177 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
5178 return rc2;
5179 }
5180 }
5181
5182 /*
5183 * Allocate a new one.
5184 */
5185 int rc = VINF_SUCCESS;
5186 uint16_t iNew = pPool->iFreeHead;
5187 if (iNew == NIL_PGMPOOL_IDX)
5188 {
5189 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
5190 if (RT_FAILURE(rc))
5191 {
5192 pgmUnlock(pVM);
5193 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
5194 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5195 return rc;
5196 }
5197 iNew = pPool->iFreeHead;
5198 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_PGM_POOL_IPE);
5199 }
5200
5201 /* unlink the free head */
5202 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5203 pPool->iFreeHead = pPage->iNext;
5204 pPage->iNext = NIL_PGMPOOL_IDX;
5205
5206 /*
5207 * Initialize it.
5208 */
5209 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5210 pPage->enmKind = enmKind;
5211 pPage->enmAccess = enmAccess;
5212 pPage->GCPhys = GCPhys;
5213 pPage->fA20Enabled = fA20Enabled;
5214 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5215 pPage->fMonitored = false;
5216 pPage->fCached = false;
5217 pPage->fDirty = false;
5218 pPage->fReusedFlushPending = false;
5219 pPage->cModifications = 0;
5220 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5221 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5222 pPage->cPresent = 0;
5223 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5224 pPage->idxDirtyEntry = 0;
5225 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5226 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5227 pPage->cLastAccessHandler = 0;
5228 pPage->cLocked = 0;
5229# ifdef VBOX_STRICT
5230 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5231# endif
5232
5233 /*
5234 * Insert into the tracking and cache. If this fails, free the page.
5235 */
5236 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5237 if (RT_FAILURE(rc3))
5238 {
5239 pPool->cUsedPages--;
5240 pPage->enmKind = PGMPOOLKIND_FREE;
5241 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5242 pPage->GCPhys = NIL_RTGCPHYS;
5243 pPage->iNext = pPool->iFreeHead;
5244 pPool->iFreeHead = pPage->idx;
5245 pgmUnlock(pVM);
5246 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5247 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
5248 return rc3;
5249 }
5250
5251 /*
5252 * Commit the allocation, clear the page and return.
5253 */
5254#ifdef VBOX_WITH_STATISTICS
5255 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5256 pPool->cUsedPagesHigh = pPool->cUsedPages;
5257#endif
5258
5259 if (!pPage->fZeroed)
5260 {
5261 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5262 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5263 ASMMemZeroPage(pv);
5264 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5265 }
5266
5267 *ppPage = pPage;
5268 if (fLockPage)
5269 pgmPoolLockPage(pPool, pPage);
5270 pgmUnlock(pVM);
5271 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5272 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5273 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5274 return rc;
5275}
5276
5277
5278/**
5279 * Frees a usage of a pool page.
5280 *
5281 * @param pVM The cross context VM structure.
5282 * @param HCPhys The HC physical address of the shadow page.
5283 * @param iUser The shadow page pool index of the user table.
5284 * NIL_PGMPOOL_IDX if root page.
5285 * @param iUserTable The index into the user table (shadowed). Ignored if
5286 * root page.
5287 */
5288void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5289{
5290 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5291 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5292 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5293}
5294
5295
5296/**
5297 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5298 *
5299 * @returns Pointer to the shadow page structure.
5300 * @param pPool The pool.
5301 * @param HCPhys The HC physical address of the shadow page.
5302 */
5303PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5304{
5305 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5306
5307 /*
5308 * Look up the page.
5309 */
5310 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5311
5312 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5313 return pPage;
5314}
5315
5316
5317/**
5318 * Internal worker for finding a page for debugging purposes, no assertions.
5319 *
5320 * @returns Pointer to the shadow page structure. NULL on if not found.
5321 * @param pPool The pool.
5322 * @param HCPhys The HC physical address of the shadow page.
5323 */
5324PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5325{
5326 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5327 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5328}
5329
5330#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5331
5332/**
5333 * Flush the specified page if present
5334 *
5335 * @param pVM The cross context VM structure.
5336 * @param GCPhys Guest physical address of the page to flush
5337 */
5338void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5339{
5340 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5341
5342 VM_ASSERT_EMT(pVM);
5343
5344 /*
5345 * Look up the GCPhys in the hash.
5346 */
5347 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5348 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5349 if (i == NIL_PGMPOOL_IDX)
5350 return;
5351
5352 do
5353 {
5354 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5355 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5356 {
5357 switch (pPage->enmKind)
5358 {
5359 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5360 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5361 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5362 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5363 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5364 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5365 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5366 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5367 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5368 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5369 case PGMPOOLKIND_64BIT_PML4:
5370 case PGMPOOLKIND_32BIT_PD:
5371 case PGMPOOLKIND_PAE_PDPT:
5372 {
5373 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5374#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5375 if (pPage->fDirty)
5376 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5377 else
5378#endif
5379 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5380 Assert(!pgmPoolIsPageLocked(pPage));
5381 pgmPoolMonitorChainFlush(pPool, pPage);
5382 return;
5383 }
5384
5385 /* ignore, no monitoring. */
5386 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5387 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5388 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5389 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5390 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5391 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5392 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5393 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5394 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5395 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5396 case PGMPOOLKIND_ROOT_NESTED:
5397 case PGMPOOLKIND_PAE_PD_PHYS:
5398 case PGMPOOLKIND_PAE_PDPT_PHYS:
5399 case PGMPOOLKIND_32BIT_PD_PHYS:
5400 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5401 break;
5402
5403 default:
5404 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5405 }
5406 }
5407
5408 /* next */
5409 i = pPage->iNext;
5410 } while (i != NIL_PGMPOOL_IDX);
5411 return;
5412}
5413
5414#endif /* IN_RING3 */
5415#ifdef IN_RING3
5416
5417/**
5418 * Reset CPU on hot plugging.
5419 *
5420 * @param pVM The cross context VM structure.
5421 * @param pVCpu The cross context virtual CPU structure.
5422 */
5423void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5424{
5425 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5426
5427 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5428 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5429 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5430}
5431
5432
5433/**
5434 * Flushes the entire cache.
5435 *
5436 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5437 * this and execute this CR3 flush.
5438 *
5439 * @param pVM The cross context VM structure.
5440 */
5441void pgmR3PoolReset(PVM pVM)
5442{
5443 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5444
5445 PGM_LOCK_ASSERT_OWNER(pVM);
5446 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5447 LogFlow(("pgmR3PoolReset:\n"));
5448
5449 /*
5450 * If there are no pages in the pool, there is nothing to do.
5451 */
5452 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5453 {
5454 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5455 return;
5456 }
5457
5458 /*
5459 * Exit the shadow mode since we're going to clear everything,
5460 * including the root page.
5461 */
5462 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5463 pgmR3ExitShadowModeBeforePoolFlush(&pVM->aCpus[i]);
5464
5465 /*
5466 * Nuke the free list and reinsert all pages into it.
5467 */
5468 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5469 {
5470 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5471
5472 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
5473 if (pPage->fMonitored)
5474 pgmPoolMonitorFlush(pPool, pPage);
5475 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5476 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5477 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5478 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5479 pPage->GCPhys = NIL_RTGCPHYS;
5480 pPage->enmKind = PGMPOOLKIND_FREE;
5481 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5482 Assert(pPage->idx == i);
5483 pPage->iNext = i + 1;
5484 pPage->fA20Enabled = true;
5485 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5486 pPage->fSeenNonGlobal = false;
5487 pPage->fMonitored = false;
5488 pPage->fDirty = false;
5489 pPage->fCached = false;
5490 pPage->fReusedFlushPending = false;
5491 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5492 pPage->cPresent = 0;
5493 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5494 pPage->cModifications = 0;
5495 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5496 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5497 pPage->idxDirtyEntry = 0;
5498 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5499 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5500 pPage->cLastAccessHandler = 0;
5501 pPage->cLocked = 0;
5502#ifdef VBOX_STRICT
5503 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5504#endif
5505 }
5506 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5507 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5508 pPool->cUsedPages = 0;
5509
5510 /*
5511 * Zap and reinitialize the user records.
5512 */
5513 pPool->cPresent = 0;
5514 pPool->iUserFreeHead = 0;
5515 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5516 const unsigned cMaxUsers = pPool->cMaxUsers;
5517 for (unsigned i = 0; i < cMaxUsers; i++)
5518 {
5519 paUsers[i].iNext = i + 1;
5520 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5521 paUsers[i].iUserTable = 0xfffffffe;
5522 }
5523 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5524
5525 /*
5526 * Clear all the GCPhys links and rebuild the phys ext free list.
5527 */
5528 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
5529 pRam;
5530 pRam = pRam->CTX_SUFF(pNext))
5531 {
5532 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5533 while (iPage-- > 0)
5534 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5535 }
5536
5537 pPool->iPhysExtFreeHead = 0;
5538 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5539 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5540 for (unsigned i = 0; i < cMaxPhysExts; i++)
5541 {
5542 paPhysExts[i].iNext = i + 1;
5543 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5544 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5545 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5546 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5547 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5548 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5549 }
5550 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5551
5552 /*
5553 * Just zap the modified list.
5554 */
5555 pPool->cModifiedPages = 0;
5556 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5557
5558 /*
5559 * Clear the GCPhys hash and the age list.
5560 */
5561 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5562 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5563 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5564 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5565
5566#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5567 /* Clear all dirty pages. */
5568 pPool->idxFreeDirtyPage = 0;
5569 pPool->cDirtyPages = 0;
5570 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
5571 pPool->aDirtyPages[i].uIdx = NIL_PGMPOOL_IDX;
5572#endif
5573
5574 /*
5575 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5576 */
5577 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5578 {
5579 /*
5580 * Re-enter the shadowing mode and assert Sync CR3 FF.
5581 */
5582 PVMCPU pVCpu = &pVM->aCpus[i];
5583 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5584 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5585 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5586 }
5587
5588 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5589}
5590
5591#endif /* IN_RING3 */
5592
5593#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5594/**
5595 * Stringifies a PGMPOOLKIND value.
5596 */
5597static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5598{
5599 switch ((PGMPOOLKIND)enmKind)
5600 {
5601 case PGMPOOLKIND_INVALID:
5602 return "PGMPOOLKIND_INVALID";
5603 case PGMPOOLKIND_FREE:
5604 return "PGMPOOLKIND_FREE";
5605 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5606 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5607 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5608 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5609 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5610 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5611 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5612 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5613 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5614 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5615 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5616 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5617 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5618 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5619 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5620 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5621 case PGMPOOLKIND_32BIT_PD:
5622 return "PGMPOOLKIND_32BIT_PD";
5623 case PGMPOOLKIND_32BIT_PD_PHYS:
5624 return "PGMPOOLKIND_32BIT_PD_PHYS";
5625 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5626 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5627 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5628 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5629 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5630 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5631 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5632 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5633 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5634 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5635 case PGMPOOLKIND_PAE_PD_PHYS:
5636 return "PGMPOOLKIND_PAE_PD_PHYS";
5637 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5638 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5639 case PGMPOOLKIND_PAE_PDPT:
5640 return "PGMPOOLKIND_PAE_PDPT";
5641 case PGMPOOLKIND_PAE_PDPT_PHYS:
5642 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5643 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5644 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5645 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5646 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5647 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5648 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5649 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5650 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5651 case PGMPOOLKIND_64BIT_PML4:
5652 return "PGMPOOLKIND_64BIT_PML4";
5653 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5654 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5655 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5656 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5657 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5658 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5659 case PGMPOOLKIND_ROOT_NESTED:
5660 return "PGMPOOLKIND_ROOT_NESTED";
5661 }
5662 return "Unknown kind!";
5663}
5664#endif /* LOG_ENABLED || VBOX_STRICT */
5665
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette