VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 65504

Last change on this file since 65504 was 65504, checked in by vboxsync, 8 years ago

PGM: Temporarily backed out r113092 & r113134.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 214.8 KB
Line 
1/* $Id: PGMAllPool.cpp 65504 2017-01-29 11:54:25Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2016 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#include <VBox/vmm/pgm.h>
24#include <VBox/vmm/mm.h>
25#include <VBox/vmm/em.h>
26#include <VBox/vmm/cpum.h>
27#ifdef IN_RC
28# include <VBox/vmm/patm.h>
29#endif
30#include "PGMInternal.h"
31#include <VBox/vmm/vm.h>
32#include "PGMInline.h"
33#include <VBox/disopcode.h>
34#include <VBox/vmm/hm_vmx.h>
35
36#include <VBox/log.h>
37#include <VBox/err.h>
38#include <iprt/asm.h>
39#include <iprt/asm-amd64-x86.h>
40#include <iprt/string.h>
41
42
43/*********************************************************************************************************************************
44* Internal Functions *
45*********************************************************************************************************************************/
46RT_C_DECLS_BEGIN
47#if 0 /* unused */
48DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
49DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
50#endif /* unused */
51static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
52static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
54static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
55#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
56static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
57#endif
58#if 0 /*defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)*/
59static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
60#endif
61
62int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
63PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
64void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
65void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
66
67RT_C_DECLS_END
68
69
70#if 0 /* unused */
71/**
72 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
73 *
74 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
75 * @param enmKind The page kind.
76 */
77DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
78{
79 switch (enmKind)
80 {
81 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
82 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
83 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
84 return true;
85 default:
86 return false;
87 }
88}
89#endif /* unused */
90
91
92/**
93 * Flushes a chain of pages sharing the same access monitor.
94 *
95 * @returns VBox status code suitable for scheduling.
96 * @param pPool The pool.
97 * @param pPage A page in the chain.
98 * @todo VBOXSTRICTRC
99 */
100int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
101{
102 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
103
104 /*
105 * Find the list head.
106 */
107 uint16_t idx = pPage->idx;
108 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
109 {
110 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
111 {
112 idx = pPage->iMonitoredPrev;
113 Assert(idx != pPage->idx);
114 pPage = &pPool->aPages[idx];
115 }
116 }
117
118 /*
119 * Iterate the list flushing each shadow page.
120 */
121 int rc = VINF_SUCCESS;
122 for (;;)
123 {
124 idx = pPage->iMonitoredNext;
125 Assert(idx != pPage->idx);
126 if (pPage->idx >= PGMPOOL_IDX_FIRST)
127 {
128 int rc2 = pgmPoolFlushPage(pPool, pPage);
129 AssertRC(rc2);
130 }
131 /* next */
132 if (idx == NIL_PGMPOOL_IDX)
133 break;
134 pPage = &pPool->aPages[idx];
135 }
136 return rc;
137}
138
139
140/**
141 * Wrapper for getting the current context pointer to the entry being modified.
142 *
143 * @returns VBox status code suitable for scheduling.
144 * @param pVM The cross context VM structure.
145 * @param pvDst Destination address
146 * @param pvSrc Pointer to the mapping of @a GCPhysSrc or NULL depending
147 * on the context (e.g. \#PF in R0 & RC).
148 * @param GCPhysSrc The source guest physical address.
149 * @param cb Size of data to read
150 */
151DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, void const *pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
152{
153#if defined(IN_RING3)
154 NOREF(pVM); NOREF(GCPhysSrc);
155 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
156 return VINF_SUCCESS;
157#else
158 /** @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
159 NOREF(pvSrc);
160 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
161#endif
162}
163
164
165/**
166 * Process shadow entries before they are changed by the guest.
167 *
168 * For PT entries we will clear them. For PD entries, we'll simply check
169 * for mapping conflicts and set the SyncCR3 FF if found.
170 *
171 * @param pVCpu The cross context virtual CPU structure.
172 * @param pPool The pool.
173 * @param pPage The head page.
174 * @param GCPhysFault The guest physical fault address.
175 * @param pvAddress Pointer to the mapping of @a GCPhysFault or NULL
176 * depending on the context (e.g. \#PF in R0 & RC).
177 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
178 */
179static void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault,
180 void const *pvAddress, unsigned cbWrite)
181{
182 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
183 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
184 PVM pVM = pPool->CTX_SUFF(pVM);
185 NOREF(pVCpu);
186
187 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n",
188 (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))(uintptr_t)pvAddress, GCPhysFault, cbWrite));
189
190 for (;;)
191 {
192 union
193 {
194 void *pv;
195 PX86PT pPT;
196 PPGMSHWPTPAE pPTPae;
197 PX86PD pPD;
198 PX86PDPAE pPDPae;
199 PX86PDPT pPDPT;
200 PX86PML4 pPML4;
201 } uShw;
202
203 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s write=%#x\n",
204 pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
205
206 uShw.pv = NULL;
207 switch (pPage->enmKind)
208 {
209 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
210 {
211 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
212 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
213 const unsigned iShw = off / sizeof(X86PTE);
214 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
215 if (uShw.pPT->a[iShw].n.u1Present)
216 {
217 X86PTE GstPte;
218
219 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
220 AssertRC(rc);
221 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
222 pgmPoolTracDerefGCPhysHint(pPool, pPage,
223 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
224 GstPte.u & X86_PTE_PG_MASK,
225 iShw);
226 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
227 }
228 break;
229 }
230
231 /* page/2 sized */
232 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
233 {
234 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
235 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
236 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
237 {
238 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
239 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
240 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
241 {
242 X86PTE GstPte;
243 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
244 AssertRC(rc);
245
246 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
247 pgmPoolTracDerefGCPhysHint(pPool, pPage,
248 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
249 GstPte.u & X86_PTE_PG_MASK,
250 iShw);
251 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
252 }
253 }
254 break;
255 }
256
257 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
258 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
259 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
260 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
261 {
262 unsigned iGst = off / sizeof(X86PDE);
263 unsigned iShwPdpt = iGst / 256;
264 unsigned iShw = (iGst % 256) * 2;
265 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
266
267 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
268 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
269 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
270 {
271 for (unsigned i = 0; i < 2; i++)
272 {
273# ifdef VBOX_WITH_RAW_MODE_NOT_R0
274 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
275 {
276 Assert(pgmMapAreMappingsEnabled(pVM));
277 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
278 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
279 break;
280 }
281# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
282 if (uShw.pPDPae->a[iShw+i].n.u1Present)
283 {
284 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
285 pgmPoolFree(pVM,
286 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
287 pPage->idx,
288 iShw + i);
289 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw+i].u, 0);
290 }
291
292 /* paranoia / a bit assumptive. */
293 if ( (off & 3)
294 && (off & 3) + cbWrite > 4)
295 {
296 const unsigned iShw2 = iShw + 2 + i;
297 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
298 {
299# ifdef VBOX_WITH_RAW_MODE_NOT_R0
300 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
301 {
302 Assert(pgmMapAreMappingsEnabled(pVM));
303 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
304 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
305 break;
306 }
307# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
308 if (uShw.pPDPae->a[iShw2].n.u1Present)
309 {
310 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
311 pgmPoolFree(pVM,
312 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
313 pPage->idx,
314 iShw2);
315 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
316 }
317 }
318 }
319 }
320 }
321 break;
322 }
323
324 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
325 {
326 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
327 const unsigned iShw = off / sizeof(X86PTEPAE);
328 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
329 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
330 {
331 X86PTEPAE GstPte;
332 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
333 AssertRC(rc);
334
335 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
336 pgmPoolTracDerefGCPhysHint(pPool, pPage,
337 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
338 GstPte.u & X86_PTE_PAE_PG_MASK,
339 iShw);
340 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
341 }
342
343 /* paranoia / a bit assumptive. */
344 if ( (off & 7)
345 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
346 {
347 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
348 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
349
350 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
351 {
352 X86PTEPAE GstPte;
353 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
354 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
355 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
356 AssertRC(rc);
357 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
358 pgmPoolTracDerefGCPhysHint(pPool, pPage,
359 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
360 GstPte.u & X86_PTE_PAE_PG_MASK,
361 iShw2);
362 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
363 }
364 }
365 break;
366 }
367
368 case PGMPOOLKIND_32BIT_PD:
369 {
370 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
371 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
372
373 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
374 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
375# ifdef VBOX_WITH_RAW_MODE_NOT_R0
376 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
377 {
378 Assert(pgmMapAreMappingsEnabled(pVM));
379 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
380 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
381 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
382 break;
383 }
384 else
385# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
386 {
387 if (uShw.pPD->a[iShw].n.u1Present)
388 {
389 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
390 pgmPoolFree(pVM,
391 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
392 pPage->idx,
393 iShw);
394 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
395 }
396 }
397 /* paranoia / a bit assumptive. */
398 if ( (off & 3)
399 && (off & 3) + cbWrite > sizeof(X86PTE))
400 {
401 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
402 if ( iShw2 != iShw
403 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
404 {
405# ifdef VBOX_WITH_RAW_MODE_NOT_R0
406 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
407 {
408 Assert(pgmMapAreMappingsEnabled(pVM));
409 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
410 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
411 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
412 break;
413 }
414# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
415 if (uShw.pPD->a[iShw2].n.u1Present)
416 {
417 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
418 pgmPoolFree(pVM,
419 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
420 pPage->idx,
421 iShw2);
422 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
423 }
424 }
425 }
426#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). - not working any longer... */
427 if ( uShw.pPD->a[iShw].n.u1Present
428 && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
429 {
430 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
431# ifdef IN_RC /* TLB load - we're pushing things a bit... */
432 ASMProbeReadByte(pvAddress);
433# endif
434 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
435 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
436 }
437#endif
438 break;
439 }
440
441 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
442 {
443 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
444 const unsigned iShw = off / sizeof(X86PDEPAE);
445 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
446#ifdef VBOX_WITH_RAW_MODE_NOT_R0
447 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
448 {
449 Assert(pgmMapAreMappingsEnabled(pVM));
450 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
451 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
452 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
453 break;
454 }
455#endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
456 /*
457 * Causes trouble when the guest uses a PDE to refer to the whole page table level
458 * structure. (Invalidate here; faults later on when it tries to change the page
459 * table entries -> recheck; probably only applies to the RC case.)
460 */
461#ifdef VBOX_WITH_RAW_MODE_NOT_R0
462 else
463#endif
464 {
465 if (uShw.pPDPae->a[iShw].n.u1Present)
466 {
467 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
468 pgmPoolFree(pVM,
469 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
470 pPage->idx,
471 iShw);
472 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
473 }
474 }
475 /* paranoia / a bit assumptive. */
476 if ( (off & 7)
477 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
478 {
479 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
480 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
481
482#ifdef VBOX_WITH_RAW_MODE_NOT_R0
483 if ( iShw2 != iShw
484 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
485 {
486 Assert(pgmMapAreMappingsEnabled(pVM));
487 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
488 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
489 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
490 break;
491 }
492 else
493#endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
494 if (uShw.pPDPae->a[iShw2].n.u1Present)
495 {
496 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
497 pgmPoolFree(pVM,
498 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
499 pPage->idx,
500 iShw2);
501 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
502 }
503 }
504 break;
505 }
506
507 case PGMPOOLKIND_PAE_PDPT:
508 {
509 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
510 /*
511 * Hopefully this doesn't happen very often:
512 * - touching unused parts of the page
513 * - messing with the bits of pd pointers without changing the physical address
514 */
515 /* PDPT roots are not page aligned; 32 byte only! */
516 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
517
518 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
519 const unsigned iShw = offPdpt / sizeof(X86PDPE);
520 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
521 {
522# ifdef VBOX_WITH_RAW_MODE_NOT_R0
523 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
524 {
525 Assert(pgmMapAreMappingsEnabled(pVM));
526 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
527 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
528 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
529 break;
530 }
531 else
532# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
533 if (uShw.pPDPT->a[iShw].n.u1Present)
534 {
535 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
536 pgmPoolFree(pVM,
537 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
538 pPage->idx,
539 iShw);
540 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
541 }
542
543 /* paranoia / a bit assumptive. */
544 if ( (offPdpt & 7)
545 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
546 {
547 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
548 if ( iShw2 != iShw
549 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
550 {
551# ifdef VBOX_WITH_RAW_MODE_NOT_R0
552 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
553 {
554 Assert(pgmMapAreMappingsEnabled(pVM));
555 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
556 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
557 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
558 break;
559 }
560 else
561# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
562 if (uShw.pPDPT->a[iShw2].n.u1Present)
563 {
564 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
565 pgmPoolFree(pVM,
566 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
567 pPage->idx,
568 iShw2);
569 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
570 }
571 }
572 }
573 }
574 break;
575 }
576
577#ifndef IN_RC
578 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
579 {
580 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
581 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
582 const unsigned iShw = off / sizeof(X86PDEPAE);
583 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
584 if (uShw.pPDPae->a[iShw].n.u1Present)
585 {
586 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
587 pgmPoolFree(pVM,
588 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
589 pPage->idx,
590 iShw);
591 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
592 }
593 /* paranoia / a bit assumptive. */
594 if ( (off & 7)
595 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
596 {
597 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
598 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
599
600 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
601 if (uShw.pPDPae->a[iShw2].n.u1Present)
602 {
603 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
604 pgmPoolFree(pVM,
605 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
606 pPage->idx,
607 iShw2);
608 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
609 }
610 }
611 break;
612 }
613
614 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
615 {
616 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
617 /*
618 * Hopefully this doesn't happen very often:
619 * - messing with the bits of pd pointers without changing the physical address
620 */
621 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
622 const unsigned iShw = off / sizeof(X86PDPE);
623 if (uShw.pPDPT->a[iShw].n.u1Present)
624 {
625 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
626 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
627 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
628 }
629 /* paranoia / a bit assumptive. */
630 if ( (off & 7)
631 && (off & 7) + cbWrite > sizeof(X86PDPE))
632 {
633 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
634 if (uShw.pPDPT->a[iShw2].n.u1Present)
635 {
636 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
637 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
638 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
639 }
640 }
641 break;
642 }
643
644 case PGMPOOLKIND_64BIT_PML4:
645 {
646 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
647 /*
648 * Hopefully this doesn't happen very often:
649 * - messing with the bits of pd pointers without changing the physical address
650 */
651 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
652 const unsigned iShw = off / sizeof(X86PDPE);
653 if (uShw.pPML4->a[iShw].n.u1Present)
654 {
655 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
656 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
657 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
658 }
659 /* paranoia / a bit assumptive. */
660 if ( (off & 7)
661 && (off & 7) + cbWrite > sizeof(X86PDPE))
662 {
663 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
664 if (uShw.pPML4->a[iShw2].n.u1Present)
665 {
666 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
667 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
668 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
669 }
670 }
671 break;
672 }
673#endif /* IN_RING0 */
674
675 default:
676 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
677 }
678 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
679
680 /* next */
681 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
682 return;
683 pPage = &pPool->aPages[pPage->iMonitoredNext];
684 }
685}
686
687# ifndef IN_RING3
688
689/**
690 * Checks if a access could be a fork operation in progress.
691 *
692 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
693 *
694 * @returns true if it's likely that we're forking, otherwise false.
695 * @param pPool The pool.
696 * @param pDis The disassembled instruction.
697 * @param offFault The access offset.
698 */
699DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
700{
701 /*
702 * i386 linux is using btr to clear X86_PTE_RW.
703 * The functions involved are (2.6.16 source inspection):
704 * clear_bit
705 * ptep_set_wrprotect
706 * copy_one_pte
707 * copy_pte_range
708 * copy_pmd_range
709 * copy_pud_range
710 * copy_page_range
711 * dup_mmap
712 * dup_mm
713 * copy_mm
714 * copy_process
715 * do_fork
716 */
717 if ( pDis->pCurInstr->uOpcode == OP_BTR
718 && !(offFault & 4)
719 /** @todo Validate that the bit index is X86_PTE_RW. */
720 )
721 {
722 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork)); RT_NOREF_PV(pPool);
723 return true;
724 }
725 return false;
726}
727
728
729/**
730 * Determine whether the page is likely to have been reused.
731 *
732 * @returns true if we consider the page as being reused for a different purpose.
733 * @returns false if we consider it to still be a paging page.
734 * @param pVM The cross context VM structure.
735 * @param pVCpu The cross context virtual CPU structure.
736 * @param pRegFrame Trap register frame.
737 * @param pDis The disassembly info for the faulting instruction.
738 * @param pvFault The fault address.
739 *
740 * @remark The REP prefix check is left to the caller because of STOSD/W.
741 */
742DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
743{
744#ifndef IN_RC
745 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
746 if ( HMHasPendingIrq(pVM)
747 && (pRegFrame->rsp - pvFault) < 32)
748 {
749 /* Fault caused by stack writes while trying to inject an interrupt event. */
750 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
751 return true;
752 }
753#else
754 NOREF(pVM); NOREF(pvFault);
755#endif
756
757 LogFlow(("Reused instr %RGv %d at %RGv param1.fUse=%llx param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->uOpcode, pvFault, pDis->Param1.fUse, pDis->Param1.Base.idxGenReg));
758
759 /* Non-supervisor mode write means it's used for something else. */
760 if (CPUMGetGuestCPL(pVCpu) == 3)
761 return true;
762
763 switch (pDis->pCurInstr->uOpcode)
764 {
765 /* call implies the actual push of the return address faulted */
766 case OP_CALL:
767 Log4(("pgmPoolMonitorIsReused: CALL\n"));
768 return true;
769 case OP_PUSH:
770 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
771 return true;
772 case OP_PUSHF:
773 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
774 return true;
775 case OP_PUSHA:
776 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
777 return true;
778 case OP_FXSAVE:
779 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
780 return true;
781 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
782 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
783 return true;
784 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
785 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
786 return true;
787 case OP_MOVSWD:
788 case OP_STOSWD:
789 if ( pDis->fPrefix == (DISPREFIX_REP|DISPREFIX_REX)
790 && pRegFrame->rcx >= 0x40
791 )
792 {
793 Assert(pDis->uCpuMode == DISCPUMODE_64BIT);
794
795 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
796 return true;
797 }
798 break;
799
800 default:
801 /*
802 * Anything having ESP on the left side means stack writes.
803 */
804 if ( ( (pDis->Param1.fUse & DISUSE_REG_GEN32)
805 || (pDis->Param1.fUse & DISUSE_REG_GEN64))
806 && (pDis->Param1.Base.idxGenReg == DISGREG_ESP))
807 {
808 Log4(("pgmPoolMonitorIsReused: ESP\n"));
809 return true;
810 }
811 break;
812 }
813
814 /*
815 * Page table updates are very very unlikely to be crossing page boundraries,
816 * and we don't want to deal with that in pgmPoolMonitorChainChanging and such.
817 */
818 uint32_t const cbWrite = DISGetParamSize(pDis, &pDis->Param1);
819 if ( (((uintptr_t)pvFault + cbWrite) >> X86_PAGE_SHIFT) != ((uintptr_t)pvFault >> X86_PAGE_SHIFT) )
820 {
821 Log4(("pgmPoolMonitorIsReused: cross page write\n"));
822 return true;
823 }
824
825 /*
826 * Nobody does an unaligned 8 byte write to a page table, right.
827 */
828 if (cbWrite >= 8 && ((uintptr_t)pvFault & 7) != 0)
829 {
830 Log4(("pgmPoolMonitorIsReused: Unaligned 8+ byte write\n"));
831 return true;
832 }
833
834 return false;
835}
836
837
838/**
839 * Flushes the page being accessed.
840 *
841 * @returns VBox status code suitable for scheduling.
842 * @param pVM The cross context VM structure.
843 * @param pVCpu The cross context virtual CPU structure.
844 * @param pPool The pool.
845 * @param pPage The pool page (head).
846 * @param pDis The disassembly of the write instruction.
847 * @param pRegFrame The trap register frame.
848 * @param GCPhysFault The fault address as guest physical address.
849 * @param pvFault The fault address.
850 * @todo VBOXSTRICTRC
851 */
852static int pgmPoolAccessPfHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
853 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
854{
855 NOREF(pVM); NOREF(GCPhysFault);
856
857 /*
858 * First, do the flushing.
859 */
860 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
861
862 /*
863 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
864 * Must do this in raw mode (!); XP boot will fail otherwise.
865 */
866 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
867 if (rc2 == VINF_SUCCESS)
868 { /* do nothing */ }
869 else if (rc2 == VINF_EM_RESCHEDULE)
870 {
871 if (rc == VINF_SUCCESS)
872 rc = VBOXSTRICTRC_VAL(rc2);
873#ifndef IN_RING3
874 VMCPU_FF_SET(pVCpu, VMCPU_FF_TO_R3);
875#endif
876 }
877 else if (rc2 == VERR_EM_INTERPRETER)
878 {
879#ifdef IN_RC
880 if (PATMIsPatchGCAddr(pVM, pRegFrame->eip))
881 {
882 LogFlow(("pgmPoolAccessPfHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
883 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->eip));
884 rc = VINF_SUCCESS;
885 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
886 }
887 else
888#endif
889 {
890 rc = VINF_EM_RAW_EMULATE_INSTR;
891 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
892 }
893 }
894 else if (RT_FAILURE_NP(rc2))
895 rc = VBOXSTRICTRC_VAL(rc2);
896 else
897 AssertMsgFailed(("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
898
899 LogFlow(("pgmPoolAccessPfHandlerPT: returns %Rrc (flushed)\n", rc));
900 return rc;
901}
902
903
904/**
905 * Handles the STOSD write accesses.
906 *
907 * @returns VBox status code suitable for scheduling.
908 * @param pVM The cross context VM structure.
909 * @param pPool The pool.
910 * @param pPage The pool page (head).
911 * @param pDis The disassembly of the write instruction.
912 * @param pRegFrame The trap register frame.
913 * @param GCPhysFault The fault address as guest physical address.
914 * @param pvFault The fault address.
915 */
916DECLINLINE(int) pgmPoolAccessPfHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
917 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
918{
919 unsigned uIncrement = pDis->Param1.cb;
920 NOREF(pVM);
921
922 Assert(pDis->uCpuMode == DISCPUMODE_32BIT || pDis->uCpuMode == DISCPUMODE_64BIT);
923 Assert(pRegFrame->rcx <= 0x20);
924
925#ifdef VBOX_STRICT
926 if (pDis->uOpMode == DISCPUMODE_32BIT)
927 Assert(uIncrement == 4);
928 else
929 Assert(uIncrement == 8);
930#endif
931
932 Log3(("pgmPoolAccessPfHandlerSTOSD\n"));
933
934 /*
935 * Increment the modification counter and insert it into the list
936 * of modified pages the first time.
937 */
938 if (!pPage->cModifications++)
939 pgmPoolMonitorModifiedInsert(pPool, pPage);
940
941 /*
942 * Execute REP STOSD.
943 *
944 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
945 * write situation, meaning that it's safe to write here.
946 */
947 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
948 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
949 while (pRegFrame->rcx)
950 {
951#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
952 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
953 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
954 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
955#else
956 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
957#endif
958#ifdef IN_RC
959 *(uint32_t *)(uintptr_t)pu32 = pRegFrame->eax;
960#else
961 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
962#endif
963 pu32 += uIncrement;
964 GCPhysFault += uIncrement;
965 pRegFrame->rdi += uIncrement;
966 pRegFrame->rcx--;
967 }
968 pRegFrame->rip += pDis->cbInstr;
969
970 LogFlow(("pgmPoolAccessPfHandlerSTOSD: returns\n"));
971 return VINF_SUCCESS;
972}
973
974
975/**
976 * Handles the simple write accesses.
977 *
978 * @returns VBox status code suitable for scheduling.
979 * @param pVM The cross context VM structure.
980 * @param pVCpu The cross context virtual CPU structure.
981 * @param pPool The pool.
982 * @param pPage The pool page (head).
983 * @param pDis The disassembly of the write instruction.
984 * @param pRegFrame The trap register frame.
985 * @param GCPhysFault The fault address as guest physical address.
986 * @param pvFault The fault address.
987 * @param pfReused Reused state (in/out)
988 */
989DECLINLINE(int) pgmPoolAccessPfHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
990 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
991{
992 Log3(("pgmPoolAccessPfHandlerSimple\n"));
993 NOREF(pVM);
994 NOREF(pfReused); /* initialized by caller */
995
996 /*
997 * Increment the modification counter and insert it into the list
998 * of modified pages the first time.
999 */
1000 if (!pPage->cModifications++)
1001 pgmPoolMonitorModifiedInsert(pPool, pPage);
1002
1003 /*
1004 * Clear all the pages. ASSUMES that pvFault is readable.
1005 */
1006#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1007 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
1008#endif
1009
1010 uint32_t cbWrite = DISGetParamSize(pDis, &pDis->Param1);
1011 if (cbWrite <= 8)
1012 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, cbWrite);
1013 else if (cbWrite <= 16)
1014 {
1015 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, 8);
1016 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + 8, NULL, cbWrite - 8);
1017 }
1018 else
1019 {
1020 Assert(cbWrite <= 32);
1021 for (uint32_t off = 0; off < cbWrite; off += 8)
1022 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + off, NULL, RT_MIN(8, cbWrite - off));
1023 }
1024
1025#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1026 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
1027#endif
1028
1029 /*
1030 * Interpret the instruction.
1031 */
1032 VBOXSTRICTRC rc = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
1033 if (RT_SUCCESS(rc))
1034 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
1035 else if (rc == VERR_EM_INTERPRETER)
1036 {
1037 LogFlow(("pgmPoolAccessPfHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1038 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode));
1039 rc = VINF_EM_RAW_EMULATE_INSTR;
1040 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1041 }
1042
1043#if 0 /* experimental code */
1044 if (rc == VINF_SUCCESS)
1045 {
1046 switch (pPage->enmKind)
1047 {
1048 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1049 {
1050 X86PTEPAE GstPte;
1051 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1052 AssertRC(rc);
1053
1054 /* Check the new value written by the guest. If present and with a bogus physical address, then
1055 * it's fairly safe to assume the guest is reusing the PT.
1056 */
1057 if (GstPte.n.u1Present)
1058 {
1059 RTHCPHYS HCPhys = -1;
1060 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1061 if (rc != VINF_SUCCESS)
1062 {
1063 *pfReused = true;
1064 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1065 }
1066 }
1067 break;
1068 }
1069 }
1070 }
1071#endif
1072
1073 LogFlow(("pgmPoolAccessPfHandlerSimple: returns %Rrc\n", VBOXSTRICTRC_VAL(rc)));
1074 return VBOXSTRICTRC_VAL(rc);
1075}
1076
1077
1078/**
1079 * @callback_method_impl{FNPGMRZPHYSPFHANDLER,
1080 * \#PF access handler callback for page table pages.}
1081 *
1082 * @remarks The @a pvUser argument points to the PGMPOOLPAGE.
1083 */
1084DECLEXPORT(VBOXSTRICTRC) pgmPoolAccessPfHandler(PVM pVM, PVMCPU pVCpu, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame,
1085 RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1086{
1087 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1088 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1089 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1090 unsigned cMaxModifications;
1091 bool fForcedFlush = false;
1092 NOREF(uErrorCode);
1093
1094 LogFlow(("pgmPoolAccessPfHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1095
1096 pgmLock(pVM);
1097 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1098 {
1099 /* Pool page changed while we were waiting for the lock; ignore. */
1100 Log(("CPU%d: pgmPoolAccessPfHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1101 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1102 pgmUnlock(pVM);
1103 return VINF_SUCCESS;
1104 }
1105#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1106 if (pPage->fDirty)
1107 {
1108 Assert(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH));
1109 pgmUnlock(pVM);
1110 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1111 }
1112#endif
1113
1114#if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1115 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1116 {
1117 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1118 void *pvGst;
1119 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1120 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1121 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1122 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1123 }
1124#endif
1125
1126 /*
1127 * Disassemble the faulting instruction.
1128 */
1129 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1130 int rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
1131 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1132 {
1133 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1134 pgmUnlock(pVM);
1135 return rc;
1136 }
1137
1138 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1139
1140 /*
1141 * We should ALWAYS have the list head as user parameter. This
1142 * is because we use that page to record the changes.
1143 */
1144 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1145
1146#ifdef IN_RING0
1147 /* Maximum nr of modifications depends on the page type. */
1148 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1149 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1150 cMaxModifications = 4;
1151 else
1152 cMaxModifications = 24;
1153#else
1154 cMaxModifications = 48;
1155#endif
1156
1157 /*
1158 * Incremental page table updates should weigh more than random ones.
1159 * (Only applies when started from offset 0)
1160 */
1161 pVCpu->pgm.s.cPoolAccessHandler++;
1162 if ( pPage->GCPtrLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1163 && pPage->GCPtrLastAccessHandlerRip < pRegFrame->rip + 0x40
1164 && pvFault == (pPage->GCPtrLastAccessHandlerFault + pDis->Param1.cb)
1165 && pVCpu->pgm.s.cPoolAccessHandler == pPage->cLastAccessHandler + 1)
1166 {
1167 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1168 Assert(pPage->cModifications < 32000);
1169 pPage->cModifications = pPage->cModifications * 2;
1170 pPage->GCPtrLastAccessHandlerFault = pvFault;
1171 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1172 if (pPage->cModifications >= cMaxModifications)
1173 {
1174 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1175 fForcedFlush = true;
1176 }
1177 }
1178
1179 if (pPage->cModifications >= cMaxModifications)
1180 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1181
1182 /*
1183 * Check if it's worth dealing with.
1184 */
1185 bool fReused = false;
1186 bool fNotReusedNotForking = false;
1187 if ( ( pPage->cModifications < cMaxModifications /** @todo \#define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1188 || pgmPoolIsPageLocked(pPage)
1189 )
1190 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1191 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1192 {
1193 /*
1194 * Simple instructions, no REP prefix.
1195 */
1196 if (!(pDis->fPrefix & (DISPREFIX_REP | DISPREFIX_REPNE)))
1197 {
1198 rc = pgmPoolAccessPfHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1199 if (fReused)
1200 goto flushPage;
1201
1202 /* A mov instruction to change the first page table entry will be remembered so we can detect
1203 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1204 */
1205 if ( rc == VINF_SUCCESS
1206 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1207 && pDis->pCurInstr->uOpcode == OP_MOV
1208 && (pvFault & PAGE_OFFSET_MASK) == 0)
1209 {
1210 pPage->GCPtrLastAccessHandlerFault = pvFault;
1211 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1212 pPage->GCPtrLastAccessHandlerRip = pRegFrame->rip;
1213 /* Make sure we don't kick out a page too quickly. */
1214 if (pPage->cModifications > 8)
1215 pPage->cModifications = 2;
1216 }
1217 else if (pPage->GCPtrLastAccessHandlerFault == pvFault)
1218 {
1219 /* ignore the 2nd write to this page table entry. */
1220 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1221 }
1222 else
1223 {
1224 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
1225 pPage->GCPtrLastAccessHandlerRip = 0;
1226 }
1227
1228 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1229 pgmUnlock(pVM);
1230 return rc;
1231 }
1232
1233 /*
1234 * Windows is frequently doing small memset() operations (netio test 4k+).
1235 * We have to deal with these or we'll kill the cache and performance.
1236 */
1237 if ( pDis->pCurInstr->uOpcode == OP_STOSWD
1238 && !pRegFrame->eflags.Bits.u1DF
1239 && pDis->uOpMode == pDis->uCpuMode
1240 && pDis->uAddrMode == pDis->uCpuMode)
1241 {
1242 bool fValidStosd = false;
1243
1244 if ( pDis->uCpuMode == DISCPUMODE_32BIT
1245 && pDis->fPrefix == DISPREFIX_REP
1246 && pRegFrame->ecx <= 0x20
1247 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1248 && !((uintptr_t)pvFault & 3)
1249 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1250 )
1251 {
1252 fValidStosd = true;
1253 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1254 }
1255 else
1256 if ( pDis->uCpuMode == DISCPUMODE_64BIT
1257 && pDis->fPrefix == (DISPREFIX_REP | DISPREFIX_REX)
1258 && pRegFrame->rcx <= 0x20
1259 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1260 && !((uintptr_t)pvFault & 7)
1261 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1262 )
1263 {
1264 fValidStosd = true;
1265 }
1266
1267 if (fValidStosd)
1268 {
1269 rc = pgmPoolAccessPfHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1270 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1271 pgmUnlock(pVM);
1272 return rc;
1273 }
1274 }
1275
1276 /* REP prefix, don't bother. */
1277 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1278 Log4(("pgmPoolAccessPfHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1279 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode, pDis->fPrefix));
1280 fNotReusedNotForking = true;
1281 }
1282
1283#if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1284 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1285 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1286 */
1287 if ( pPage->cModifications >= cMaxModifications
1288 && !fForcedFlush
1289 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1290 && ( fNotReusedNotForking
1291 || ( !pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
1292 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1293 )
1294 )
1295 {
1296 Assert(!pgmPoolIsPageLocked(pPage));
1297 Assert(pPage->fDirty == false);
1298
1299 /* Flush any monitored duplicates as we will disable write protection. */
1300 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1301 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1302 {
1303 PPGMPOOLPAGE pPageHead = pPage;
1304
1305 /* Find the monitor head. */
1306 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1307 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1308
1309 while (pPageHead)
1310 {
1311 unsigned idxNext = pPageHead->iMonitoredNext;
1312
1313 if (pPageHead != pPage)
1314 {
1315 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1316 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1317 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1318 AssertRC(rc2);
1319 }
1320
1321 if (idxNext == NIL_PGMPOOL_IDX)
1322 break;
1323
1324 pPageHead = &pPool->aPages[idxNext];
1325 }
1326 }
1327
1328 /* The flushing above might fail for locked pages, so double check. */
1329 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1330 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1331 {
1332 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1333
1334 /* Temporarily allow write access to the page table again. */
1335 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK, pPage->GCPhys & PAGE_BASE_GC_MASK);
1336 if (rc == VINF_SUCCESS)
1337 {
1338 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1339 AssertMsg(rc == VINF_SUCCESS
1340 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1341 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1342 || rc == VERR_PAGE_NOT_PRESENT,
1343 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1344# ifdef VBOX_STRICT
1345 pPage->GCPtrDirtyFault = pvFault;
1346# endif
1347
1348 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1349 pgmUnlock(pVM);
1350 return rc;
1351 }
1352 }
1353 }
1354#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1355
1356 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushModOverflow));
1357flushPage:
1358 /*
1359 * Not worth it, so flush it.
1360 *
1361 * If we considered it to be reused, don't go back to ring-3
1362 * to emulate failed instructions since we usually cannot
1363 * interpret then. This may be a bit risky, in which case
1364 * the reuse detection must be fixed.
1365 */
1366 rc = pgmPoolAccessPfHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1367 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1368 && fReused)
1369 {
1370 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1371 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1372 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1373 }
1374 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1375 pgmUnlock(pVM);
1376 return rc;
1377}
1378
1379# endif /* !IN_RING3 */
1380
1381/**
1382 * @callback_method_impl{FNPGMPHYSHANDLER,
1383 * Access handler for shadowed page table pages.}
1384 *
1385 * @remarks Only uses the VINF_PGM_HANDLER_DO_DEFAULT status.
1386 */
1387PGM_ALL_CB2_DECL(VBOXSTRICTRC)
1388pgmPoolAccessHandler(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf,
1389 PGMACCESSTYPE enmAccessType, PGMACCESSORIGIN enmOrigin, void *pvUser)
1390{
1391 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1392 STAM_PROFILE_START(&pPool->StatMonitorR3, a);
1393 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1394 LogFlow(("PGM_ALL_CB_DECL: GCPhys=%RGp %p:{.Core=%RHp, .idx=%d, .GCPhys=%RGp, .enmType=%d}\n",
1395 GCPhys, pPage, pPage->Core.Key, pPage->idx, pPage->GCPhys, pPage->enmKind));
1396
1397 NOREF(pvPhys); NOREF(pvBuf); NOREF(enmAccessType);
1398
1399 /*
1400 * Make sure the pool page wasn't modified by a different CPU.
1401 */
1402 pgmLock(pVM);
1403 if (PHYS_PAGE_ADDRESS(GCPhys) == PHYS_PAGE_ADDRESS(pPage->GCPhys))
1404 {
1405 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1406
1407 /* The max modification count before flushing depends on the context and page type. */
1408#ifdef IN_RING3
1409 uint16_t const cMaxModifications = 96; /* it's cheaper here, right? */
1410#else
1411 uint16_t cMaxModifications;
1412 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1413 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1414 cMaxModifications = 4;
1415 else
1416 cMaxModifications = 24;
1417# ifdef IN_RC
1418 cMaxModifications *= 2; /* traps are cheaper than exists. */
1419# endif
1420#endif
1421
1422 /*
1423 * We don't have to be very sophisticated about this since there are relativly few calls here.
1424 * However, we must try our best to detect any non-cpu accesses (disk / networking).
1425 */
1426 if ( ( pPage->cModifications < cMaxModifications
1427 || pgmPoolIsPageLocked(pPage) )
1428 && enmOrigin != PGMACCESSORIGIN_DEVICE
1429 && cbBuf <= 16)
1430 {
1431 /* Clear the shadow entry. */
1432 if (!pPage->cModifications++)
1433 pgmPoolMonitorModifiedInsert(pPool, pPage);
1434
1435 if (cbBuf <= 8)
1436 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, (uint32_t)cbBuf);
1437 else
1438 {
1439 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, 8);
1440 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys + 8, (uint8_t *)pvBuf + 8, (uint32_t)cbBuf - 8);
1441 }
1442 }
1443 else
1444 {
1445 /* ASSUME that VERR_PGM_POOL_CLEARED can be ignored here and that FFs will deal with it in due time. */
1446 pgmPoolMonitorChainFlush(pPool, pPage);
1447 }
1448
1449 STAM_PROFILE_STOP_EX(&pPool->StatMonitorR3, &pPool->StatMonitorR3FlushPage, a);
1450 }
1451 else
1452 Log(("CPU%d: PGM_ALL_CB_DECL pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhys), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1453 pgmUnlock(pVM);
1454 return VINF_PGM_HANDLER_DO_DEFAULT;
1455}
1456
1457
1458# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1459
1460# if defined(VBOX_STRICT) && !defined(IN_RING3)
1461
1462/**
1463 * Check references to guest physical memory in a PAE / PAE page table.
1464 *
1465 * @param pPool The pool.
1466 * @param pPage The page.
1467 * @param pShwPT The shadow page table (mapping of the page).
1468 * @param pGstPT The guest page table.
1469 */
1470static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1471{
1472 unsigned cErrors = 0;
1473 int LastRc = -1; /* initialized to shut up gcc */
1474 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1475 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1476 PVM pVM = pPool->CTX_SUFF(pVM);
1477
1478#ifdef VBOX_STRICT
1479 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1480 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1481#endif
1482 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1483 {
1484 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1485 {
1486 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1487 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1488 if ( rc != VINF_SUCCESS
1489 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1490 {
1491 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1492 LastPTE = i;
1493 LastRc = rc;
1494 LastHCPhys = HCPhys;
1495 cErrors++;
1496
1497 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1498 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1499 AssertRC(rc);
1500
1501 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1502 {
1503 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1504
1505 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1506 {
1507 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1508
1509 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1510 {
1511 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1512 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1513 {
1514 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1515 }
1516 }
1517
1518 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1519 }
1520 }
1521 }
1522 }
1523 }
1524 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1525}
1526
1527
1528/**
1529 * Check references to guest physical memory in a PAE / 32-bit page table.
1530 *
1531 * @param pPool The pool.
1532 * @param pPage The page.
1533 * @param pShwPT The shadow page table (mapping of the page).
1534 * @param pGstPT The guest page table.
1535 */
1536static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1537{
1538 unsigned cErrors = 0;
1539 int LastRc = -1; /* initialized to shut up gcc */
1540 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1541 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1542 PVM pVM = pPool->CTX_SUFF(pVM);
1543
1544#ifdef VBOX_STRICT
1545 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1546 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1547#endif
1548 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1549 {
1550 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1551 {
1552 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1553 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1554 if ( rc != VINF_SUCCESS
1555 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1556 {
1557 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1558 LastPTE = i;
1559 LastRc = rc;
1560 LastHCPhys = HCPhys;
1561 cErrors++;
1562
1563 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1564 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1565 AssertRC(rc);
1566
1567 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1568 {
1569 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1570
1571 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1572 {
1573 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1574
1575 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1576 {
1577 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1578 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1579 {
1580 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1581 }
1582 }
1583
1584 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1585 }
1586 }
1587 }
1588 }
1589 }
1590 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1591}
1592
1593# endif /* VBOX_STRICT && !IN_RING3 */
1594
1595/**
1596 * Clear references to guest physical memory in a PAE / PAE page table.
1597 *
1598 * @returns nr of changed PTEs
1599 * @param pPool The pool.
1600 * @param pPage The page.
1601 * @param pShwPT The shadow page table (mapping of the page).
1602 * @param pGstPT The guest page table.
1603 * @param pOldGstPT The old cached guest page table.
1604 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1605 * @param pfFlush Flush reused page table (out)
1606 */
1607DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1608 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1609{
1610 unsigned cChanged = 0;
1611
1612#ifdef VBOX_STRICT
1613 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1614 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1615#endif
1616 *pfFlush = false;
1617
1618 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1619 {
1620 /* Check the new value written by the guest. If present and with a bogus physical address, then
1621 * it's fairly safe to assume the guest is reusing the PT.
1622 */
1623 if ( fAllowRemoval
1624 && pGstPT->a[i].n.u1Present)
1625 {
1626 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1627 {
1628 *pfFlush = true;
1629 return ++cChanged;
1630 }
1631 }
1632 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1633 {
1634 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1635 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1636 {
1637#ifdef VBOX_STRICT
1638 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1639 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1640 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1641#endif
1642 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1643 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1644 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1645 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1646
1647 if ( uHostAttr == uGuestAttr
1648 && fHostRW <= fGuestRW)
1649 continue;
1650 }
1651 cChanged++;
1652 /* Something was changed, so flush it. */
1653 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1654 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1655 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1656 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1657 }
1658 }
1659 return cChanged;
1660}
1661
1662
1663/**
1664 * Clear references to guest physical memory in a PAE / PAE page table.
1665 *
1666 * @returns nr of changed PTEs
1667 * @param pPool The pool.
1668 * @param pPage The page.
1669 * @param pShwPT The shadow page table (mapping of the page).
1670 * @param pGstPT The guest page table.
1671 * @param pOldGstPT The old cached guest page table.
1672 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1673 * @param pfFlush Flush reused page table (out)
1674 */
1675DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1676 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1677{
1678 unsigned cChanged = 0;
1679
1680#ifdef VBOX_STRICT
1681 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1682 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1683#endif
1684 *pfFlush = false;
1685
1686 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1687 {
1688 /* Check the new value written by the guest. If present and with a bogus physical address, then
1689 * it's fairly safe to assume the guest is reusing the PT.
1690 */
1691 if ( fAllowRemoval
1692 && pGstPT->a[i].n.u1Present)
1693 {
1694 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK))
1695 {
1696 *pfFlush = true;
1697 return ++cChanged;
1698 }
1699 }
1700 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1701 {
1702 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1703 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1704 {
1705#ifdef VBOX_STRICT
1706 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1707 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1708 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1709#endif
1710 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1711 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1712 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1713 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1714
1715 if ( uHostAttr == uGuestAttr
1716 && fHostRW <= fGuestRW)
1717 continue;
1718 }
1719 cChanged++;
1720 /* Something was changed, so flush it. */
1721 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1722 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1723 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1724 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1725 }
1726 }
1727 return cChanged;
1728}
1729
1730
1731/**
1732 * Flush a dirty page
1733 *
1734 * @param pVM The cross context VM structure.
1735 * @param pPool The pool.
1736 * @param idxSlot Dirty array slot index
1737 * @param fAllowRemoval Allow a reused page table to be removed
1738 */
1739static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1740{
1741 PPGMPOOLPAGE pPage;
1742 unsigned idxPage;
1743
1744 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1745 if (pPool->aDirtyPages[idxSlot].uIdx == NIL_PGMPOOL_IDX)
1746 return;
1747
1748 idxPage = pPool->aDirtyPages[idxSlot].uIdx;
1749 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1750 pPage = &pPool->aPages[idxPage];
1751 Assert(pPage->idx == idxPage);
1752 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1753
1754 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1755 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1756
1757#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1758 PVMCPU pVCpu = VMMGetCpu(pVM);
1759 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
1760#endif
1761
1762 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1763 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK);
1764 Assert(rc == VINF_SUCCESS);
1765 pPage->fDirty = false;
1766
1767#ifdef VBOX_STRICT
1768 uint64_t fFlags = 0;
1769 RTHCPHYS HCPhys;
1770 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->GCPtrDirtyFault, &fFlags, &HCPhys);
1771 AssertMsg( ( rc == VINF_SUCCESS
1772 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1773 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1774 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1775 || rc == VERR_PAGE_NOT_PRESENT,
1776 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->GCPtrDirtyFault, rc, fFlags));
1777#endif
1778
1779 /* Flush those PTEs that have changed. */
1780 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1781 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1782 void *pvGst;
1783 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1784 bool fFlush;
1785 unsigned cChanges;
1786
1787 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1788 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1789 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1790 else
1791 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1792 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1793
1794 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1795 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1796 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1797 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1798
1799 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1800 Assert(pPage->cModifications);
1801 if (cChanges < 4)
1802 pPage->cModifications = 1; /* must use > 0 here */
1803 else
1804 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1805
1806 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1807 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1808 pPool->idxFreeDirtyPage = idxSlot;
1809
1810 pPool->cDirtyPages--;
1811 pPool->aDirtyPages[idxSlot].uIdx = NIL_PGMPOOL_IDX;
1812 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1813 if (fFlush)
1814 {
1815 Assert(fAllowRemoval);
1816 Log(("Flush reused page table!\n"));
1817 pgmPoolFlushPage(pPool, pPage);
1818 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1819 }
1820 else
1821 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1822
1823#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1824 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
1825#endif
1826}
1827
1828
1829# ifndef IN_RING3
1830/**
1831 * Add a new dirty page
1832 *
1833 * @param pVM The cross context VM structure.
1834 * @param pPool The pool.
1835 * @param pPage The page.
1836 */
1837void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1838{
1839 unsigned idxFree;
1840
1841 PGM_LOCK_ASSERT_OWNER(pVM);
1842 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1843 Assert(!pPage->fDirty);
1844
1845 idxFree = pPool->idxFreeDirtyPage;
1846 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1847 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1848
1849 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1850 {
1851 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1852 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1853 }
1854 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1855 AssertMsg(pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1856
1857 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1858
1859 /*
1860 * Make a copy of the guest page table as we require valid GCPhys addresses
1861 * when removing references to physical pages.
1862 * (The HCPhys linear lookup is *extremely* expensive!)
1863 */
1864 void *pvGst;
1865 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1866 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst, (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT) ? PAGE_SIZE : PAGE_SIZE/2);
1867# ifdef VBOX_STRICT
1868 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1869 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1870 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1871 else
1872 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1873 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1874# endif
1875 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1876
1877 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1878 pPage->fDirty = true;
1879 pPage->idxDirtyEntry = (uint8_t)idxFree; Assert(pPage->idxDirtyEntry == idxFree);
1880 pPool->aDirtyPages[idxFree].uIdx = pPage->idx;
1881 pPool->cDirtyPages++;
1882
1883 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1884 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1885 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1886 {
1887 unsigned i;
1888 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1889 {
1890 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1891 if (pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX)
1892 {
1893 pPool->idxFreeDirtyPage = idxFree;
1894 break;
1895 }
1896 }
1897 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1898 }
1899
1900 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX);
1901
1902 /*
1903 * Clear all references to this shadow table. See @bugref{7298}.
1904 */
1905 pgmPoolTrackClearPageUsers(pPool, pPage);
1906}
1907# endif /* !IN_RING3 */
1908
1909
1910/**
1911 * Check if the specified page is dirty (not write monitored)
1912 *
1913 * @return dirty or not
1914 * @param pVM The cross context VM structure.
1915 * @param GCPhys Guest physical address
1916 */
1917bool pgmPoolIsDirtyPage(PVM pVM, RTGCPHYS GCPhys)
1918{
1919 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1920 PGM_LOCK_ASSERT_OWNER(pVM);
1921 if (!pPool->cDirtyPages)
1922 return false;
1923
1924 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1925
1926 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1927 {
1928 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
1929 {
1930 PPGMPOOLPAGE pPage;
1931 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
1932
1933 pPage = &pPool->aPages[idxPage];
1934 if (pPage->GCPhys == GCPhys)
1935 return true;
1936 }
1937 }
1938 return false;
1939}
1940
1941
1942/**
1943 * Reset all dirty pages by reinstating page monitoring.
1944 *
1945 * @param pVM The cross context VM structure.
1946 */
1947void pgmPoolResetDirtyPages(PVM pVM)
1948{
1949 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1950 PGM_LOCK_ASSERT_OWNER(pVM);
1951 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1952
1953 if (!pPool->cDirtyPages)
1954 return;
1955
1956 Log(("pgmPoolResetDirtyPages\n"));
1957 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1958 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1959
1960 pPool->idxFreeDirtyPage = 0;
1961 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1962 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1963 {
1964 unsigned i;
1965 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1966 {
1967 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
1968 {
1969 pPool->idxFreeDirtyPage = i;
1970 break;
1971 }
1972 }
1973 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1974 }
1975
1976 Assert(pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
1977 return;
1978}
1979
1980
1981/**
1982 * Invalidate the PT entry for the specified page
1983 *
1984 * @param pVM The cross context VM structure.
1985 * @param GCPtrPage Guest page to invalidate
1986 */
1987void pgmPoolResetDirtyPage(PVM pVM, RTGCPTR GCPtrPage)
1988{
1989 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1990 PGM_LOCK_ASSERT_OWNER(pVM);
1991 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1992
1993 if (!pPool->cDirtyPages)
1994 return;
1995
1996 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage)); RT_NOREF_PV(GCPtrPage);
1997 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1998 {
1999 }
2000}
2001
2002
2003/**
2004 * Reset all dirty pages by reinstating page monitoring.
2005 *
2006 * @param pVM The cross context VM structure.
2007 * @param GCPhysPT Physical address of the page table
2008 */
2009void pgmPoolInvalidateDirtyPage(PVM pVM, RTGCPHYS GCPhysPT)
2010{
2011 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2012 PGM_LOCK_ASSERT_OWNER(pVM);
2013 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
2014 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
2015
2016 if (!pPool->cDirtyPages)
2017 return;
2018
2019 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2020
2021 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2022 {
2023 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
2024 {
2025 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
2026
2027 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
2028 if (pPage->GCPhys == GCPhysPT)
2029 {
2030 idxDirtyPage = i;
2031 break;
2032 }
2033 }
2034 }
2035
2036 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
2037 {
2038 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
2039 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
2040 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
2041 {
2042 unsigned i;
2043 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2044 {
2045 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
2046 {
2047 pPool->idxFreeDirtyPage = i;
2048 break;
2049 }
2050 }
2051 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
2052 }
2053 }
2054}
2055
2056# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
2057
2058/**
2059 * Inserts a page into the GCPhys hash table.
2060 *
2061 * @param pPool The pool.
2062 * @param pPage The page.
2063 */
2064DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2065{
2066 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
2067 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2068 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2069 pPage->iNext = pPool->aiHash[iHash];
2070 pPool->aiHash[iHash] = pPage->idx;
2071}
2072
2073
2074/**
2075 * Removes a page from the GCPhys hash table.
2076 *
2077 * @param pPool The pool.
2078 * @param pPage The page.
2079 */
2080DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2081{
2082 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
2083 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2084 if (pPool->aiHash[iHash] == pPage->idx)
2085 pPool->aiHash[iHash] = pPage->iNext;
2086 else
2087 {
2088 uint16_t iPrev = pPool->aiHash[iHash];
2089 for (;;)
2090 {
2091 const int16_t i = pPool->aPages[iPrev].iNext;
2092 if (i == pPage->idx)
2093 {
2094 pPool->aPages[iPrev].iNext = pPage->iNext;
2095 break;
2096 }
2097 if (i == NIL_PGMPOOL_IDX)
2098 {
2099 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
2100 break;
2101 }
2102 iPrev = i;
2103 }
2104 }
2105 pPage->iNext = NIL_PGMPOOL_IDX;
2106}
2107
2108
2109/**
2110 * Frees up one cache page.
2111 *
2112 * @returns VBox status code.
2113 * @retval VINF_SUCCESS on success.
2114 * @param pPool The pool.
2115 * @param iUser The user index.
2116 */
2117static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
2118{
2119#ifndef IN_RC
2120 const PVM pVM = pPool->CTX_SUFF(pVM);
2121#endif
2122 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
2123 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
2124
2125 /*
2126 * Select one page from the tail of the age list.
2127 */
2128 PPGMPOOLPAGE pPage;
2129 for (unsigned iLoop = 0; ; iLoop++)
2130 {
2131 uint16_t iToFree = pPool->iAgeTail;
2132 if (iToFree == iUser && iUser != NIL_PGMPOOL_IDX)
2133 iToFree = pPool->aPages[iToFree].iAgePrev;
2134/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2135 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2136 {
2137 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2138 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2139 {
2140 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2141 continue;
2142 iToFree = i;
2143 break;
2144 }
2145 }
2146*/
2147 Assert(iToFree != iUser);
2148 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
2149 pPage = &pPool->aPages[iToFree];
2150
2151 /*
2152 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2153 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2154 */
2155 if ( !pgmPoolIsPageLocked(pPage)
2156 && pPage->idx >= PGMPOOL_IDX_FIRST /* paranoia (#6349) */)
2157 break;
2158 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2159 pgmPoolCacheUsed(pPool, pPage);
2160 AssertLogRelReturn(iLoop < 8192, VERR_PGM_POOL_TOO_MANY_LOOPS);
2161 }
2162
2163 /*
2164 * Found a usable page, flush it and return.
2165 */
2166 int rc = pgmPoolFlushPage(pPool, pPage);
2167 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2168 /** @todo find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2169 if (rc == VINF_SUCCESS)
2170 PGM_INVL_ALL_VCPU_TLBS(pVM);
2171 return rc;
2172}
2173
2174
2175/**
2176 * Checks if a kind mismatch is really a page being reused
2177 * or if it's just normal remappings.
2178 *
2179 * @returns true if reused and the cached page (enmKind1) should be flushed
2180 * @returns false if not reused.
2181 * @param enmKind1 The kind of the cached page.
2182 * @param enmKind2 The kind of the requested page.
2183 */
2184static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2185{
2186 switch (enmKind1)
2187 {
2188 /*
2189 * Never reuse them. There is no remapping in non-paging mode.
2190 */
2191 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2192 case PGMPOOLKIND_32BIT_PD_PHYS:
2193 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2194 case PGMPOOLKIND_PAE_PD_PHYS:
2195 case PGMPOOLKIND_PAE_PDPT_PHYS:
2196 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2197 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2198 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2199 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2200 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2201 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2202 return false;
2203
2204 /*
2205 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2206 */
2207 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2208 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2209 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2210 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2211 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2212 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2213 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2214 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2215 case PGMPOOLKIND_32BIT_PD:
2216 case PGMPOOLKIND_PAE_PDPT:
2217 switch (enmKind2)
2218 {
2219 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2220 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2221 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2222 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2223 case PGMPOOLKIND_64BIT_PML4:
2224 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2225 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2226 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2227 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2228 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2229 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2230 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2231 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2232 return true;
2233 default:
2234 return false;
2235 }
2236
2237 /*
2238 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2239 */
2240 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2241 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2242 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2243 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2244 case PGMPOOLKIND_64BIT_PML4:
2245 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2246 switch (enmKind2)
2247 {
2248 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2249 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2250 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2251 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2252 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2253 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2254 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2255 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2256 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2257 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2258 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2259 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2260 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2261 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2262 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2263 return true;
2264 default:
2265 return false;
2266 }
2267
2268 /*
2269 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2270 */
2271 case PGMPOOLKIND_ROOT_NESTED:
2272 return false;
2273
2274 default:
2275 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2276 }
2277}
2278
2279
2280/**
2281 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2282 *
2283 * @returns VBox status code.
2284 * @retval VINF_PGM_CACHED_PAGE on success.
2285 * @retval VERR_FILE_NOT_FOUND if not found.
2286 * @param pPool The pool.
2287 * @param GCPhys The GC physical address of the page we're gonna shadow.
2288 * @param enmKind The kind of mapping.
2289 * @param enmAccess Access type for the mapping (only relevant for big pages)
2290 * @param fA20Enabled Whether the CPU has the A20 gate enabled.
2291 * @param iUser The shadow page pool index of the user table. This is
2292 * NIL_PGMPOOL_IDX for root pages.
2293 * @param iUserTable The index into the user table (shadowed). Ignored if
2294 * root page
2295 * @param ppPage Where to store the pointer to the page.
2296 */
2297static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
2298 uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2299{
2300 /*
2301 * Look up the GCPhys in the hash.
2302 */
2303 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2304 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2305 if (i != NIL_PGMPOOL_IDX)
2306 {
2307 do
2308 {
2309 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2310 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2311 if (pPage->GCPhys == GCPhys)
2312 {
2313 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2314 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess
2315 && pPage->fA20Enabled == fA20Enabled)
2316 {
2317 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2318 * doesn't flush it in case there are no more free use records.
2319 */
2320 pgmPoolCacheUsed(pPool, pPage);
2321
2322 int rc = VINF_SUCCESS;
2323 if (iUser != NIL_PGMPOOL_IDX)
2324 rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2325 if (RT_SUCCESS(rc))
2326 {
2327 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2328 *ppPage = pPage;
2329 if (pPage->cModifications)
2330 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2331 STAM_COUNTER_INC(&pPool->StatCacheHits);
2332 return VINF_PGM_CACHED_PAGE;
2333 }
2334 return rc;
2335 }
2336
2337 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2338 {
2339 /*
2340 * The kind is different. In some cases we should now flush the page
2341 * as it has been reused, but in most cases this is normal remapping
2342 * of PDs as PT or big pages using the GCPhys field in a slightly
2343 * different way than the other kinds.
2344 */
2345 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2346 {
2347 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2348 pgmPoolFlushPage(pPool, pPage);
2349 break;
2350 }
2351 }
2352 }
2353
2354 /* next */
2355 i = pPage->iNext;
2356 } while (i != NIL_PGMPOOL_IDX);
2357 }
2358
2359 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2360 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2361 return VERR_FILE_NOT_FOUND;
2362}
2363
2364
2365/**
2366 * Inserts a page into the cache.
2367 *
2368 * @param pPool The pool.
2369 * @param pPage The cached page.
2370 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2371 */
2372static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2373{
2374 /*
2375 * Insert into the GCPhys hash if the page is fit for that.
2376 */
2377 Assert(!pPage->fCached);
2378 if (fCanBeCached)
2379 {
2380 pPage->fCached = true;
2381 pgmPoolHashInsert(pPool, pPage);
2382 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2383 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2384 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2385 }
2386 else
2387 {
2388 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2389 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2390 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2391 }
2392
2393 /*
2394 * Insert at the head of the age list.
2395 */
2396 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2397 pPage->iAgeNext = pPool->iAgeHead;
2398 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2399 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2400 else
2401 pPool->iAgeTail = pPage->idx;
2402 pPool->iAgeHead = pPage->idx;
2403}
2404
2405
2406/**
2407 * Flushes a cached page.
2408 *
2409 * @param pPool The pool.
2410 * @param pPage The cached page.
2411 */
2412static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2413{
2414 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2415
2416 /*
2417 * Remove the page from the hash.
2418 */
2419 if (pPage->fCached)
2420 {
2421 pPage->fCached = false;
2422 pgmPoolHashRemove(pPool, pPage);
2423 }
2424 else
2425 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2426
2427 /*
2428 * Remove it from the age list.
2429 */
2430 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2431 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2432 else
2433 pPool->iAgeTail = pPage->iAgePrev;
2434 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2435 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2436 else
2437 pPool->iAgeHead = pPage->iAgeNext;
2438 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2439 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2440}
2441
2442
2443/**
2444 * Looks for pages sharing the monitor.
2445 *
2446 * @returns Pointer to the head page.
2447 * @returns NULL if not found.
2448 * @param pPool The Pool
2449 * @param pNewPage The page which is going to be monitored.
2450 */
2451static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2452{
2453 /*
2454 * Look up the GCPhys in the hash.
2455 */
2456 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2457 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2458 if (i == NIL_PGMPOOL_IDX)
2459 return NULL;
2460 do
2461 {
2462 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2463 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2464 && pPage != pNewPage)
2465 {
2466 switch (pPage->enmKind)
2467 {
2468 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2469 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2470 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2471 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2472 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2473 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2474 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2475 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2476 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2477 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2478 case PGMPOOLKIND_64BIT_PML4:
2479 case PGMPOOLKIND_32BIT_PD:
2480 case PGMPOOLKIND_PAE_PDPT:
2481 {
2482 /* find the head */
2483 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2484 {
2485 Assert(pPage->iMonitoredPrev != pPage->idx);
2486 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2487 }
2488 return pPage;
2489 }
2490
2491 /* ignore, no monitoring. */
2492 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2493 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2494 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2495 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2496 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2497 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2498 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2499 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2500 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2501 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2502 case PGMPOOLKIND_ROOT_NESTED:
2503 case PGMPOOLKIND_PAE_PD_PHYS:
2504 case PGMPOOLKIND_PAE_PDPT_PHYS:
2505 case PGMPOOLKIND_32BIT_PD_PHYS:
2506 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2507 break;
2508 default:
2509 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2510 }
2511 }
2512
2513 /* next */
2514 i = pPage->iNext;
2515 } while (i != NIL_PGMPOOL_IDX);
2516 return NULL;
2517}
2518
2519
2520/**
2521 * Enabled write monitoring of a guest page.
2522 *
2523 * @returns VBox status code.
2524 * @retval VINF_SUCCESS on success.
2525 * @param pPool The pool.
2526 * @param pPage The cached page.
2527 */
2528static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2529{
2530 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2531
2532 /*
2533 * Filter out the relevant kinds.
2534 */
2535 switch (pPage->enmKind)
2536 {
2537 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2538 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2539 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2540 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2541 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2542 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2543 case PGMPOOLKIND_64BIT_PML4:
2544 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2545 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2546 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2547 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2548 case PGMPOOLKIND_32BIT_PD:
2549 case PGMPOOLKIND_PAE_PDPT:
2550 break;
2551
2552 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2553 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2554 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2555 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2556 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2557 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2558 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2559 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2560 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2561 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2562 case PGMPOOLKIND_ROOT_NESTED:
2563 /* Nothing to monitor here. */
2564 return VINF_SUCCESS;
2565
2566 case PGMPOOLKIND_32BIT_PD_PHYS:
2567 case PGMPOOLKIND_PAE_PDPT_PHYS:
2568 case PGMPOOLKIND_PAE_PD_PHYS:
2569 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2570 /* Nothing to monitor here. */
2571 return VINF_SUCCESS;
2572 default:
2573 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2574 }
2575
2576 /*
2577 * Install handler.
2578 */
2579 int rc;
2580 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2581 if (pPageHead)
2582 {
2583 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2584 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2585
2586#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2587 if (pPageHead->fDirty)
2588 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirtyEntry, false /* do not remove */);
2589#endif
2590
2591 pPage->iMonitoredPrev = pPageHead->idx;
2592 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2593 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2594 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2595 pPageHead->iMonitoredNext = pPage->idx;
2596 rc = VINF_SUCCESS;
2597 }
2598 else
2599 {
2600 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2601 PVM pVM = pPool->CTX_SUFF(pVM);
2602 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2603 rc = PGMHandlerPhysicalRegister(pVM, GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK, pPool->hAccessHandlerType,
2604 MMHyperCCToR3(pVM, pPage), MMHyperCCToR0(pVM, pPage), MMHyperCCToRC(pVM, pPage),
2605 NIL_RTR3PTR /*pszDesc*/);
2606 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2607 * the heap size should suffice. */
2608 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2609 PVMCPU pVCpu = VMMGetCpu(pVM);
2610 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2611 }
2612 pPage->fMonitored = true;
2613 return rc;
2614}
2615
2616
2617/**
2618 * Disables write monitoring of a guest page.
2619 *
2620 * @returns VBox status code.
2621 * @retval VINF_SUCCESS on success.
2622 * @param pPool The pool.
2623 * @param pPage The cached page.
2624 */
2625static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2626{
2627 /*
2628 * Filter out the relevant kinds.
2629 */
2630 switch (pPage->enmKind)
2631 {
2632 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2633 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2634 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2635 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2636 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2637 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2638 case PGMPOOLKIND_64BIT_PML4:
2639 case PGMPOOLKIND_32BIT_PD:
2640 case PGMPOOLKIND_PAE_PDPT:
2641 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2642 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2643 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2644 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2645 break;
2646
2647 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2648 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2649 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2650 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2651 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2652 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2653 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2654 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2655 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2656 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2657 case PGMPOOLKIND_ROOT_NESTED:
2658 case PGMPOOLKIND_PAE_PD_PHYS:
2659 case PGMPOOLKIND_PAE_PDPT_PHYS:
2660 case PGMPOOLKIND_32BIT_PD_PHYS:
2661 /* Nothing to monitor here. */
2662 Assert(!pPage->fMonitored);
2663 return VINF_SUCCESS;
2664
2665 default:
2666 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2667 }
2668 Assert(pPage->fMonitored);
2669
2670 /*
2671 * Remove the page from the monitored list or uninstall it if last.
2672 */
2673 const PVM pVM = pPool->CTX_SUFF(pVM);
2674 int rc;
2675 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2676 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2677 {
2678 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2679 {
2680 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2681 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2682 rc = PGMHandlerPhysicalChangeUserArgs(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK, MMHyperCCToR3(pVM, pNewHead),
2683 MMHyperCCToR0(pVM, pNewHead), MMHyperCCToRC(pVM, pNewHead));
2684
2685 AssertFatalRCSuccess(rc);
2686 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2687 }
2688 else
2689 {
2690 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2691 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2692 {
2693 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2694 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2695 }
2696 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2697 rc = VINF_SUCCESS;
2698 }
2699 }
2700 else
2701 {
2702 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2703 AssertFatalRC(rc);
2704 PVMCPU pVCpu = VMMGetCpu(pVM);
2705 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2706 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2707 }
2708 pPage->fMonitored = false;
2709
2710 /*
2711 * Remove it from the list of modified pages (if in it).
2712 */
2713 pgmPoolMonitorModifiedRemove(pPool, pPage);
2714
2715 return rc;
2716}
2717
2718
2719/**
2720 * Inserts the page into the list of modified pages.
2721 *
2722 * @param pPool The pool.
2723 * @param pPage The page.
2724 */
2725void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2726{
2727 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2728 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2729 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2730 && pPool->iModifiedHead != pPage->idx,
2731 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2732 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2733 pPool->iModifiedHead, pPool->cModifiedPages));
2734
2735 pPage->iModifiedNext = pPool->iModifiedHead;
2736 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2737 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2738 pPool->iModifiedHead = pPage->idx;
2739 pPool->cModifiedPages++;
2740#ifdef VBOX_WITH_STATISTICS
2741 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2742 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2743#endif
2744}
2745
2746
2747/**
2748 * Removes the page from the list of modified pages and resets the
2749 * modification counter.
2750 *
2751 * @param pPool The pool.
2752 * @param pPage The page which is believed to be in the list of modified pages.
2753 */
2754static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2755{
2756 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2757 if (pPool->iModifiedHead == pPage->idx)
2758 {
2759 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2760 pPool->iModifiedHead = pPage->iModifiedNext;
2761 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2762 {
2763 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2764 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2765 }
2766 pPool->cModifiedPages--;
2767 }
2768 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2769 {
2770 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2771 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2772 {
2773 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2774 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2775 }
2776 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2777 pPool->cModifiedPages--;
2778 }
2779 else
2780 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2781 pPage->cModifications = 0;
2782}
2783
2784
2785/**
2786 * Zaps the list of modified pages, resetting their modification counters in the process.
2787 *
2788 * @param pVM The cross context VM structure.
2789 */
2790static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2791{
2792 pgmLock(pVM);
2793 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2794 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2795
2796 unsigned cPages = 0; NOREF(cPages);
2797
2798#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2799 pgmPoolResetDirtyPages(pVM);
2800#endif
2801
2802 uint16_t idx = pPool->iModifiedHead;
2803 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2804 while (idx != NIL_PGMPOOL_IDX)
2805 {
2806 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2807 idx = pPage->iModifiedNext;
2808 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2809 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2810 pPage->cModifications = 0;
2811 Assert(++cPages);
2812 }
2813 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2814 pPool->cModifiedPages = 0;
2815 pgmUnlock(pVM);
2816}
2817
2818
2819/**
2820 * Handle SyncCR3 pool tasks
2821 *
2822 * @returns VBox status code.
2823 * @retval VINF_SUCCESS if successfully added.
2824 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2825 * @param pVCpu The cross context virtual CPU structure.
2826 * @remark Should only be used when monitoring is available, thus placed in
2827 * the PGMPOOL_WITH_MONITORING \#ifdef.
2828 */
2829int pgmPoolSyncCR3(PVMCPU pVCpu)
2830{
2831 PVM pVM = pVCpu->CTX_SUFF(pVM);
2832 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2833
2834 /*
2835 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2836 * Occasionally we will have to clear all the shadow page tables because we wanted
2837 * to monitor a page which was mapped by too many shadowed page tables. This operation
2838 * sometimes referred to as a 'lightweight flush'.
2839 */
2840# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2841 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2842 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2843# else /* !IN_RING3 */
2844 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2845 {
2846 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2847 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2848
2849 /* Make sure all other VCPUs return to ring 3. */
2850 if (pVM->cCpus > 1)
2851 {
2852 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2853 PGM_INVL_ALL_VCPU_TLBS(pVM);
2854 }
2855 return VINF_PGM_SYNC_CR3;
2856 }
2857# endif /* !IN_RING3 */
2858 else
2859 {
2860 pgmPoolMonitorModifiedClearAll(pVM);
2861
2862 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2863 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2864 {
2865 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2866 return pgmPoolSyncCR3(pVCpu);
2867 }
2868 }
2869 return VINF_SUCCESS;
2870}
2871
2872
2873/**
2874 * Frees up at least one user entry.
2875 *
2876 * @returns VBox status code.
2877 * @retval VINF_SUCCESS if successfully added.
2878 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2879 * @param pPool The pool.
2880 * @param iUser The user index.
2881 */
2882static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2883{
2884 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2885 /*
2886 * Just free cached pages in a braindead fashion.
2887 */
2888 /** @todo walk the age list backwards and free the first with usage. */
2889 int rc = VINF_SUCCESS;
2890 do
2891 {
2892 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2893 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2894 rc = rc2;
2895 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2896 return rc;
2897}
2898
2899
2900/**
2901 * Inserts a page into the cache.
2902 *
2903 * This will create user node for the page, insert it into the GCPhys
2904 * hash, and insert it into the age list.
2905 *
2906 * @returns VBox status code.
2907 * @retval VINF_SUCCESS if successfully added.
2908 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2909 * @param pPool The pool.
2910 * @param pPage The cached page.
2911 * @param GCPhys The GC physical address of the page we're gonna shadow.
2912 * @param iUser The user index.
2913 * @param iUserTable The user table index.
2914 */
2915DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2916{
2917 int rc = VINF_SUCCESS;
2918 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2919
2920 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable)); RT_NOREF_PV(GCPhys);
2921
2922 if (iUser != NIL_PGMPOOL_IDX)
2923 {
2924#ifdef VBOX_STRICT
2925 /*
2926 * Check that the entry doesn't already exists.
2927 */
2928 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2929 {
2930 uint16_t i = pPage->iUserHead;
2931 do
2932 {
2933 Assert(i < pPool->cMaxUsers);
2934 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2935 i = paUsers[i].iNext;
2936 } while (i != NIL_PGMPOOL_USER_INDEX);
2937 }
2938#endif
2939
2940 /*
2941 * Find free a user node.
2942 */
2943 uint16_t i = pPool->iUserFreeHead;
2944 if (i == NIL_PGMPOOL_USER_INDEX)
2945 {
2946 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2947 if (RT_FAILURE(rc))
2948 return rc;
2949 i = pPool->iUserFreeHead;
2950 }
2951
2952 /*
2953 * Unlink the user node from the free list,
2954 * initialize and insert it into the user list.
2955 */
2956 pPool->iUserFreeHead = paUsers[i].iNext;
2957 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2958 paUsers[i].iUser = iUser;
2959 paUsers[i].iUserTable = iUserTable;
2960 pPage->iUserHead = i;
2961 }
2962 else
2963 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2964
2965
2966 /*
2967 * Insert into cache and enable monitoring of the guest page if enabled.
2968 *
2969 * Until we implement caching of all levels, including the CR3 one, we'll
2970 * have to make sure we don't try monitor & cache any recursive reuse of
2971 * a monitored CR3 page. Because all windows versions are doing this we'll
2972 * have to be able to do combined access monitoring, CR3 + PT and
2973 * PD + PT (guest PAE).
2974 *
2975 * Update:
2976 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2977 */
2978 const bool fCanBeMonitored = true;
2979 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2980 if (fCanBeMonitored)
2981 {
2982 rc = pgmPoolMonitorInsert(pPool, pPage);
2983 AssertRC(rc);
2984 }
2985 return rc;
2986}
2987
2988
2989/**
2990 * Adds a user reference to a page.
2991 *
2992 * This will move the page to the head of the
2993 *
2994 * @returns VBox status code.
2995 * @retval VINF_SUCCESS if successfully added.
2996 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2997 * @param pPool The pool.
2998 * @param pPage The cached page.
2999 * @param iUser The user index.
3000 * @param iUserTable The user table.
3001 */
3002static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3003{
3004 Log3(("pgmPoolTrackAddUser: GCPhys=%RGp iUser=%x iUserTable=%x\n", pPage->GCPhys, iUser, iUserTable));
3005 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3006 Assert(iUser != NIL_PGMPOOL_IDX);
3007
3008# ifdef VBOX_STRICT
3009 /*
3010 * Check that the entry doesn't already exists. We only allow multiple
3011 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
3012 */
3013 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
3014 {
3015 uint16_t i = pPage->iUserHead;
3016 do
3017 {
3018 Assert(i < pPool->cMaxUsers);
3019 /** @todo this assertion looks odd... Shouldn't it be && here? */
3020 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
3021 i = paUsers[i].iNext;
3022 } while (i != NIL_PGMPOOL_USER_INDEX);
3023 }
3024# endif
3025
3026 /*
3027 * Allocate a user node.
3028 */
3029 uint16_t i = pPool->iUserFreeHead;
3030 if (i == NIL_PGMPOOL_USER_INDEX)
3031 {
3032 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
3033 if (RT_FAILURE(rc))
3034 return rc;
3035 i = pPool->iUserFreeHead;
3036 }
3037 pPool->iUserFreeHead = paUsers[i].iNext;
3038
3039 /*
3040 * Initialize the user node and insert it.
3041 */
3042 paUsers[i].iNext = pPage->iUserHead;
3043 paUsers[i].iUser = iUser;
3044 paUsers[i].iUserTable = iUserTable;
3045 pPage->iUserHead = i;
3046
3047# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3048 if (pPage->fDirty)
3049 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirtyEntry, false /* do not remove */);
3050# endif
3051
3052 /*
3053 * Tell the cache to update its replacement stats for this page.
3054 */
3055 pgmPoolCacheUsed(pPool, pPage);
3056 return VINF_SUCCESS;
3057}
3058
3059
3060/**
3061 * Frees a user record associated with a page.
3062 *
3063 * This does not clear the entry in the user table, it simply replaces the
3064 * user record to the chain of free records.
3065 *
3066 * @param pPool The pool.
3067 * @param pPage The shadow page.
3068 * @param iUser The shadow page pool index of the user table.
3069 * @param iUserTable The index into the user table (shadowed).
3070 *
3071 * @remarks Don't call this for root pages.
3072 */
3073static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3074{
3075 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
3076 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3077 Assert(iUser != NIL_PGMPOOL_IDX);
3078
3079 /*
3080 * Unlink and free the specified user entry.
3081 */
3082
3083 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
3084 uint16_t i = pPage->iUserHead;
3085 if ( i != NIL_PGMPOOL_USER_INDEX
3086 && paUsers[i].iUser == iUser
3087 && paUsers[i].iUserTable == iUserTable)
3088 {
3089 pPage->iUserHead = paUsers[i].iNext;
3090
3091 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3092 paUsers[i].iNext = pPool->iUserFreeHead;
3093 pPool->iUserFreeHead = i;
3094 return;
3095 }
3096
3097 /* General: Linear search. */
3098 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
3099 while (i != NIL_PGMPOOL_USER_INDEX)
3100 {
3101 if ( paUsers[i].iUser == iUser
3102 && paUsers[i].iUserTable == iUserTable)
3103 {
3104 if (iPrev != NIL_PGMPOOL_USER_INDEX)
3105 paUsers[iPrev].iNext = paUsers[i].iNext;
3106 else
3107 pPage->iUserHead = paUsers[i].iNext;
3108
3109 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3110 paUsers[i].iNext = pPool->iUserFreeHead;
3111 pPool->iUserFreeHead = i;
3112 return;
3113 }
3114 iPrev = i;
3115 i = paUsers[i].iNext;
3116 }
3117
3118 /* Fatal: didn't find it */
3119 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
3120 iUser, iUserTable, pPage->GCPhys));
3121}
3122
3123
3124#if 0 /* unused */
3125/**
3126 * Gets the entry size of a shadow table.
3127 *
3128 * @param enmKind The kind of page.
3129 *
3130 * @returns The size of the entry in bytes. That is, 4 or 8.
3131 * @returns If the kind is not for a table, an assertion is raised and 0 is
3132 * returned.
3133 */
3134DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
3135{
3136 switch (enmKind)
3137 {
3138 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3139 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3140 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3141 case PGMPOOLKIND_32BIT_PD:
3142 case PGMPOOLKIND_32BIT_PD_PHYS:
3143 return 4;
3144
3145 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3146 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3147 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3148 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3149 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3150 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3151 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3152 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3153 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3154 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3155 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3156 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3157 case PGMPOOLKIND_64BIT_PML4:
3158 case PGMPOOLKIND_PAE_PDPT:
3159 case PGMPOOLKIND_ROOT_NESTED:
3160 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3161 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3162 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3163 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3164 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3165 case PGMPOOLKIND_PAE_PD_PHYS:
3166 case PGMPOOLKIND_PAE_PDPT_PHYS:
3167 return 8;
3168
3169 default:
3170 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3171 }
3172}
3173#endif /* unused */
3174
3175#if 0 /* unused */
3176/**
3177 * Gets the entry size of a guest table.
3178 *
3179 * @param enmKind The kind of page.
3180 *
3181 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3182 * @returns If the kind is not for a table, an assertion is raised and 0 is
3183 * returned.
3184 */
3185DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3186{
3187 switch (enmKind)
3188 {
3189 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3190 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3191 case PGMPOOLKIND_32BIT_PD:
3192 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3193 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3194 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3195 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3196 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3197 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3198 return 4;
3199
3200 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3201 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3202 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3203 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3204 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3205 case PGMPOOLKIND_64BIT_PML4:
3206 case PGMPOOLKIND_PAE_PDPT:
3207 return 8;
3208
3209 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3210 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3211 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3212 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3213 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3214 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3215 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3216 case PGMPOOLKIND_ROOT_NESTED:
3217 case PGMPOOLKIND_PAE_PD_PHYS:
3218 case PGMPOOLKIND_PAE_PDPT_PHYS:
3219 case PGMPOOLKIND_32BIT_PD_PHYS:
3220 /** @todo can we return 0? (nobody is calling this...) */
3221 AssertFailed();
3222 return 0;
3223
3224 default:
3225 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3226 }
3227}
3228#endif /* unused */
3229
3230
3231/**
3232 * Checks one shadow page table entry for a mapping of a physical page.
3233 *
3234 * @returns true / false indicating removal of all relevant PTEs
3235 *
3236 * @param pVM The cross context VM structure.
3237 * @param pPhysPage The guest page in question.
3238 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3239 * @param iShw The shadow page table.
3240 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3241 */
3242static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3243{
3244 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3245 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3246 bool fRet = false;
3247
3248 /*
3249 * Assert sanity.
3250 */
3251 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3252 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3253 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3254
3255 /*
3256 * Then, clear the actual mappings to the page in the shadow PT.
3257 */
3258 switch (pPage->enmKind)
3259 {
3260 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3261 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3262 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3263 {
3264 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3265 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3266 uint32_t u32AndMask = 0;
3267 uint32_t u32OrMask = 0;
3268
3269 if (!fFlushPTEs)
3270 {
3271 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3272 {
3273 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
3274 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
3275 u32OrMask = X86_PTE_RW;
3276 u32AndMask = UINT32_MAX;
3277 fRet = true;
3278 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3279 break;
3280
3281 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3282 u32OrMask = 0;
3283 u32AndMask = ~X86_PTE_RW;
3284 fRet = true;
3285 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3286 break;
3287 default:
3288 /* (shouldn't be here, will assert below) */
3289 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3290 break;
3291 }
3292 }
3293 else
3294 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3295
3296 /* Update the counter if we're removing references. */
3297 if (!u32AndMask)
3298 {
3299 Assert(pPage->cPresent);
3300 Assert(pPool->cPresent);
3301 pPage->cPresent--;
3302 pPool->cPresent--;
3303 }
3304
3305 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3306 {
3307 X86PTE Pte;
3308
3309 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3310 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3311 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3312 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3313
3314 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3315 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3316 return fRet;
3317 }
3318#ifdef LOG_ENABLED
3319 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3320 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3321 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3322 {
3323 Log(("i=%d cFound=%d\n", i, ++cFound));
3324 }
3325#endif
3326 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3327 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3328 break;
3329 }
3330
3331 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3332 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3333 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3334 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3335 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3336 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3337 {
3338 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3339 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3340 uint64_t u64OrMask = 0;
3341 uint64_t u64AndMask = 0;
3342
3343 if (!fFlushPTEs)
3344 {
3345 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3346 {
3347 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3348 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3349 u64OrMask = X86_PTE_RW;
3350 u64AndMask = UINT64_MAX;
3351 fRet = true;
3352 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3353 break;
3354
3355 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3356 u64OrMask = 0;
3357 u64AndMask = ~(uint64_t)X86_PTE_RW;
3358 fRet = true;
3359 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3360 break;
3361
3362 default:
3363 /* (shouldn't be here, will assert below) */
3364 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3365 break;
3366 }
3367 }
3368 else
3369 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3370
3371 /* Update the counter if we're removing references. */
3372 if (!u64AndMask)
3373 {
3374 Assert(pPage->cPresent);
3375 Assert(pPool->cPresent);
3376 pPage->cPresent--;
3377 pPool->cPresent--;
3378 }
3379
3380 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3381 {
3382 X86PTEPAE Pte;
3383
3384 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3385 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3386 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3387 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3388
3389 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3390 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3391 return fRet;
3392 }
3393#ifdef LOG_ENABLED
3394 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3395 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3396 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3397 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3398 Log(("i=%d cFound=%d\n", i, ++cFound));
3399#endif
3400 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3401 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3402 break;
3403 }
3404
3405#ifdef PGM_WITH_LARGE_PAGES
3406 /* Large page case only. */
3407 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3408 {
3409 Assert(pVM->pgm.s.fNestedPaging);
3410
3411 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3412 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3413
3414 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3415 {
3416 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3417 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3418 pPD->a[iPte].u = 0;
3419 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3420
3421 /* Update the counter as we're removing references. */
3422 Assert(pPage->cPresent);
3423 Assert(pPool->cPresent);
3424 pPage->cPresent--;
3425 pPool->cPresent--;
3426
3427 return fRet;
3428 }
3429# ifdef LOG_ENABLED
3430 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3431 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3432 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3433 Log(("i=%d cFound=%d\n", i, ++cFound));
3434# endif
3435 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3436 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3437 break;
3438 }
3439
3440 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3441 case PGMPOOLKIND_PAE_PD_PHYS:
3442 {
3443 Assert(pVM->pgm.s.fNestedPaging);
3444
3445 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3446 PX86PD pPD = (PX86PD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3447
3448 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3449 {
3450 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3451 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3452 pPD->a[iPte].u = 0;
3453 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3454
3455 /* Update the counter as we're removing references. */
3456 Assert(pPage->cPresent);
3457 Assert(pPool->cPresent);
3458 pPage->cPresent--;
3459 pPool->cPresent--;
3460 return fRet;
3461 }
3462# ifdef LOG_ENABLED
3463 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3464 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3465 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3466 Log(("i=%d cFound=%d\n", i, ++cFound));
3467# endif
3468 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3469 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3470 break;
3471 }
3472#endif /* PGM_WITH_LARGE_PAGES */
3473
3474 default:
3475 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3476 }
3477
3478 /* not reached. */
3479#ifndef _MSC_VER
3480 return fRet;
3481#endif
3482}
3483
3484
3485/**
3486 * Scans one shadow page table for mappings of a physical page.
3487 *
3488 * @param pVM The cross context VM structure.
3489 * @param pPhysPage The guest page in question.
3490 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3491 * @param iShw The shadow page table.
3492 */
3493static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3494{
3495 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3496
3497 /* We should only come here with when there's only one reference to this physical page. */
3498 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3499
3500 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3501 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3502 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3503 if (!fKeptPTEs)
3504 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3505 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3506}
3507
3508
3509/**
3510 * Flushes a list of shadow page tables mapping the same physical page.
3511 *
3512 * @param pVM The cross context VM structure.
3513 * @param pPhysPage The guest page in question.
3514 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3515 * @param iPhysExt The physical cross reference extent list to flush.
3516 */
3517static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3518{
3519 PGM_LOCK_ASSERT_OWNER(pVM);
3520 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3521 bool fKeepList = false;
3522
3523 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3524 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt=%u\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3525
3526 const uint16_t iPhysExtStart = iPhysExt;
3527 PPGMPOOLPHYSEXT pPhysExt;
3528 do
3529 {
3530 Assert(iPhysExt < pPool->cMaxPhysExts);
3531 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3532 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3533 {
3534 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3535 {
3536 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3537 if (!fKeptPTEs)
3538 {
3539 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3540 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3541 }
3542 else
3543 fKeepList = true;
3544 }
3545 }
3546 /* next */
3547 iPhysExt = pPhysExt->iNext;
3548 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3549
3550 if (!fKeepList)
3551 {
3552 /* insert the list into the free list and clear the ram range entry. */
3553 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3554 pPool->iPhysExtFreeHead = iPhysExtStart;
3555 /* Invalidate the tracking data. */
3556 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3557 }
3558
3559 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3560}
3561
3562
3563/**
3564 * Flushes all shadow page table mappings of the given guest page.
3565 *
3566 * This is typically called when the host page backing the guest one has been
3567 * replaced or when the page protection was changed due to a guest access
3568 * caught by the monitoring.
3569 *
3570 * @returns VBox status code.
3571 * @retval VINF_SUCCESS if all references has been successfully cleared.
3572 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3573 * pool cleaning. FF and sync flags are set.
3574 *
3575 * @param pVM The cross context VM structure.
3576 * @param GCPhysPage GC physical address of the page in question
3577 * @param pPhysPage The guest page in question.
3578 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3579 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3580 * flushed, it is NOT touched if this isn't necessary.
3581 * The caller MUST initialized this to @a false.
3582 */
3583int pgmPoolTrackUpdateGCPhys(PVM pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3584{
3585 PVMCPU pVCpu = VMMGetCpu(pVM);
3586 pgmLock(pVM);
3587 int rc = VINF_SUCCESS;
3588
3589#ifdef PGM_WITH_LARGE_PAGES
3590 /* Is this page part of a large page? */
3591 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3592 {
3593 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3594 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3595
3596 /* Fetch the large page base. */
3597 PPGMPAGE pLargePage;
3598 if (GCPhysBase != GCPhysPage)
3599 {
3600 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3601 AssertFatal(pLargePage);
3602 }
3603 else
3604 pLargePage = pPhysPage;
3605
3606 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3607
3608 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3609 {
3610 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3611 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3612 pVM->pgm.s.cLargePagesDisabled++;
3613
3614 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3615 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3616
3617 *pfFlushTLBs = true;
3618 pgmUnlock(pVM);
3619 return rc;
3620 }
3621 }
3622#else
3623 NOREF(GCPhysPage);
3624#endif /* PGM_WITH_LARGE_PAGES */
3625
3626 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3627 if (u16)
3628 {
3629 /*
3630 * The zero page is currently screwing up the tracking and we'll
3631 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3632 * is defined, zero pages won't normally be mapped. Some kind of solution
3633 * will be needed for this problem of course, but it will have to wait...
3634 */
3635 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3636 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3637 rc = VINF_PGM_GCPHYS_ALIASED;
3638 else
3639 {
3640# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC) /** @todo we can drop this now. */
3641 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3642 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3643 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
3644# endif
3645
3646 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3647 {
3648 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3649 pgmPoolTrackFlushGCPhysPT(pVM,
3650 pPhysPage,
3651 fFlushPTEs,
3652 PGMPOOL_TD_GET_IDX(u16));
3653 }
3654 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3655 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3656 else
3657 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3658 *pfFlushTLBs = true;
3659
3660# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
3661 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
3662# endif
3663 }
3664 }
3665
3666 if (rc == VINF_PGM_GCPHYS_ALIASED)
3667 {
3668 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3669 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3670 rc = VINF_PGM_SYNC_CR3;
3671 }
3672 pgmUnlock(pVM);
3673 return rc;
3674}
3675
3676
3677/**
3678 * Scans all shadow page tables for mappings of a physical page.
3679 *
3680 * This may be slow, but it's most likely more efficient than cleaning
3681 * out the entire page pool / cache.
3682 *
3683 * @returns VBox status code.
3684 * @retval VINF_SUCCESS if all references has been successfully cleared.
3685 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3686 * a page pool cleaning.
3687 *
3688 * @param pVM The cross context VM structure.
3689 * @param pPhysPage The guest page in question.
3690 */
3691int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3692{
3693 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3694 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3695 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3696 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3697
3698 /*
3699 * There is a limit to what makes sense.
3700 */
3701 if ( pPool->cPresent > 1024
3702 && pVM->cCpus == 1)
3703 {
3704 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3705 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3706 return VINF_PGM_GCPHYS_ALIASED;
3707 }
3708
3709 /*
3710 * Iterate all the pages until we've encountered all that in use.
3711 * This is simple but not quite optimal solution.
3712 */
3713 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P; /** @todo drop X86_PTE_P here as we always test if present separately, anyway. */
3714 const uint32_t u32 = u64; /** @todo move into the 32BIT_PT_xx case */
3715 unsigned cLeft = pPool->cUsedPages;
3716 unsigned iPage = pPool->cCurPages;
3717 while (--iPage >= PGMPOOL_IDX_FIRST)
3718 {
3719 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3720 if ( pPage->GCPhys != NIL_RTGCPHYS
3721 && pPage->cPresent)
3722 {
3723 switch (pPage->enmKind)
3724 {
3725 /*
3726 * We only care about shadow page tables.
3727 */
3728 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3729 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3730 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3731 {
3732 unsigned cPresent = pPage->cPresent;
3733 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3734 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3735 if (pPT->a[i].n.u1Present)
3736 {
3737 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3738 {
3739 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3740 pPT->a[i].u = 0;
3741
3742 /* Update the counter as we're removing references. */
3743 Assert(pPage->cPresent);
3744 Assert(pPool->cPresent);
3745 pPage->cPresent--;
3746 pPool->cPresent--;
3747 }
3748 if (!--cPresent)
3749 break;
3750 }
3751 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3752 break;
3753 }
3754
3755 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3756 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3757 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3758 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3759 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3760 {
3761 unsigned cPresent = pPage->cPresent;
3762 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3763 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3764 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3765 {
3766 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3767 {
3768 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3769 PGMSHWPTEPAE_SET(pPT->a[i], 0); /// @todo why not atomic?
3770
3771 /* Update the counter as we're removing references. */
3772 Assert(pPage->cPresent);
3773 Assert(pPool->cPresent);
3774 pPage->cPresent--;
3775 pPool->cPresent--;
3776 }
3777 if (!--cPresent)
3778 break;
3779 }
3780 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3781 break;
3782 }
3783#ifndef IN_RC
3784 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3785 {
3786 unsigned cPresent = pPage->cPresent;
3787 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3788 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3789 if (pPT->a[i].n.u1Present)
3790 {
3791 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3792 {
3793 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3794 pPT->a[i].u = 0;
3795
3796 /* Update the counter as we're removing references. */
3797 Assert(pPage->cPresent);
3798 Assert(pPool->cPresent);
3799 pPage->cPresent--;
3800 pPool->cPresent--;
3801 }
3802 if (!--cPresent)
3803 break;
3804 }
3805 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3806 break;
3807 }
3808#endif
3809 }
3810 if (!--cLeft)
3811 break;
3812 }
3813 }
3814
3815 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3816 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3817
3818 /*
3819 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3820 */
3821 if (pPool->cPresent > 1024)
3822 {
3823 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3824 return VINF_PGM_GCPHYS_ALIASED;
3825 }
3826
3827 return VINF_SUCCESS;
3828}
3829
3830
3831/**
3832 * Clears the user entry in a user table.
3833 *
3834 * This is used to remove all references to a page when flushing it.
3835 */
3836static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3837{
3838 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3839 Assert(pUser->iUser < pPool->cCurPages);
3840 uint32_t iUserTable = pUser->iUserTable;
3841
3842 /*
3843 * Map the user page. Ignore references made by fictitious pages.
3844 */
3845 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3846 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3847 union
3848 {
3849 uint64_t *pau64;
3850 uint32_t *pau32;
3851 } u;
3852 if (pUserPage->idx < PGMPOOL_IDX_FIRST)
3853 {
3854 Assert(!pUserPage->pvPageR3);
3855 return;
3856 }
3857 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3858
3859
3860 /* Safety precaution in case we change the paging for other modes too in the future. */
3861 Assert(!pgmPoolIsPageLocked(pPage)); RT_NOREF_PV(pPage);
3862
3863#ifdef VBOX_STRICT
3864 /*
3865 * Some sanity checks.
3866 */
3867 switch (pUserPage->enmKind)
3868 {
3869 case PGMPOOLKIND_32BIT_PD:
3870 case PGMPOOLKIND_32BIT_PD_PHYS:
3871 Assert(iUserTable < X86_PG_ENTRIES);
3872 break;
3873 case PGMPOOLKIND_PAE_PDPT:
3874 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3875 case PGMPOOLKIND_PAE_PDPT_PHYS:
3876 Assert(iUserTable < 4);
3877 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3878 break;
3879 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3880 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3881 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3882 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3883 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3884 case PGMPOOLKIND_PAE_PD_PHYS:
3885 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3886 break;
3887 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3888 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3889 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3890 break;
3891 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3892 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3893 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3894 break;
3895 case PGMPOOLKIND_64BIT_PML4:
3896 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3897 /* GCPhys >> PAGE_SHIFT is the index here */
3898 break;
3899 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3900 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3901 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3902 break;
3903
3904 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3905 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3906 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3907 break;
3908
3909 case PGMPOOLKIND_ROOT_NESTED:
3910 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3911 break;
3912
3913 default:
3914 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3915 break;
3916 }
3917#endif /* VBOX_STRICT */
3918
3919 /*
3920 * Clear the entry in the user page.
3921 */
3922 switch (pUserPage->enmKind)
3923 {
3924 /* 32-bit entries */
3925 case PGMPOOLKIND_32BIT_PD:
3926 case PGMPOOLKIND_32BIT_PD_PHYS:
3927 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
3928 break;
3929
3930 /* 64-bit entries */
3931 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3932 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3933 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3934 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3935 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3936#ifdef IN_RC
3937 /*
3938 * In 32 bits PAE mode we *must* invalidate the TLB when changing a
3939 * PDPT entry; the CPU fetches them only during cr3 load, so any
3940 * non-present PDPT will continue to cause page faults.
3941 */
3942 ASMReloadCR3();
3943 /* no break */
3944#endif
3945 case PGMPOOLKIND_PAE_PD_PHYS:
3946 case PGMPOOLKIND_PAE_PDPT_PHYS:
3947 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3948 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3949 case PGMPOOLKIND_64BIT_PML4:
3950 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3951 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3952 case PGMPOOLKIND_PAE_PDPT:
3953 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3954 case PGMPOOLKIND_ROOT_NESTED:
3955 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3956 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3957 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
3958 break;
3959
3960 default:
3961 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3962 }
3963 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
3964}
3965
3966
3967/**
3968 * Clears all users of a page.
3969 */
3970static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3971{
3972 /*
3973 * Free all the user records.
3974 */
3975 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3976
3977 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3978 uint16_t i = pPage->iUserHead;
3979 while (i != NIL_PGMPOOL_USER_INDEX)
3980 {
3981 /* Clear enter in user table. */
3982 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3983
3984 /* Free it. */
3985 const uint16_t iNext = paUsers[i].iNext;
3986 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3987 paUsers[i].iNext = pPool->iUserFreeHead;
3988 pPool->iUserFreeHead = i;
3989
3990 /* Next. */
3991 i = iNext;
3992 }
3993 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3994}
3995
3996
3997/**
3998 * Allocates a new physical cross reference extent.
3999 *
4000 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
4001 * @param pVM The cross context VM structure.
4002 * @param piPhysExt Where to store the phys ext index.
4003 */
4004PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
4005{
4006 PGM_LOCK_ASSERT_OWNER(pVM);
4007 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4008 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
4009 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4010 {
4011 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
4012 return NULL;
4013 }
4014 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4015 pPool->iPhysExtFreeHead = pPhysExt->iNext;
4016 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4017 *piPhysExt = iPhysExt;
4018 return pPhysExt;
4019}
4020
4021
4022/**
4023 * Frees a physical cross reference extent.
4024 *
4025 * @param pVM The cross context VM structure.
4026 * @param iPhysExt The extent to free.
4027 */
4028void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
4029{
4030 PGM_LOCK_ASSERT_OWNER(pVM);
4031 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4032 Assert(iPhysExt < pPool->cMaxPhysExts);
4033 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4034 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4035 {
4036 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4037 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4038 }
4039 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4040 pPool->iPhysExtFreeHead = iPhysExt;
4041}
4042
4043
4044/**
4045 * Frees a physical cross reference extent.
4046 *
4047 * @param pVM The cross context VM structure.
4048 * @param iPhysExt The extent to free.
4049 */
4050void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
4051{
4052 PGM_LOCK_ASSERT_OWNER(pVM);
4053 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4054
4055 const uint16_t iPhysExtStart = iPhysExt;
4056 PPGMPOOLPHYSEXT pPhysExt;
4057 do
4058 {
4059 Assert(iPhysExt < pPool->cMaxPhysExts);
4060 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4061 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4062 {
4063 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4064 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4065 }
4066
4067 /* next */
4068 iPhysExt = pPhysExt->iNext;
4069 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4070
4071 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4072 pPool->iPhysExtFreeHead = iPhysExtStart;
4073}
4074
4075
4076/**
4077 * Insert a reference into a list of physical cross reference extents.
4078 *
4079 * @returns The new tracking data for PGMPAGE.
4080 *
4081 * @param pVM The cross context VM structure.
4082 * @param iPhysExt The physical extent index of the list head.
4083 * @param iShwPT The shadow page table index.
4084 * @param iPte Page table entry
4085 *
4086 */
4087static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
4088{
4089 PGM_LOCK_ASSERT_OWNER(pVM);
4090 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4091 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4092
4093 /*
4094 * Special common cases.
4095 */
4096 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
4097 {
4098 paPhysExts[iPhysExt].aidx[1] = iShwPT;
4099 paPhysExts[iPhysExt].apte[1] = iPte;
4100 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4101 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
4102 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4103 }
4104 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
4105 {
4106 paPhysExts[iPhysExt].aidx[2] = iShwPT;
4107 paPhysExts[iPhysExt].apte[2] = iPte;
4108 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4109 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
4110 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4111 }
4112 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
4113
4114 /*
4115 * General treatment.
4116 */
4117 const uint16_t iPhysExtStart = iPhysExt;
4118 unsigned cMax = 15;
4119 for (;;)
4120 {
4121 Assert(iPhysExt < pPool->cMaxPhysExts);
4122 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4123 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
4124 {
4125 paPhysExts[iPhysExt].aidx[i] = iShwPT;
4126 paPhysExts[iPhysExt].apte[i] = iPte;
4127 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4128 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
4129 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
4130 }
4131 if (!--cMax)
4132 {
4133 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackOverflows);
4134 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4135 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
4136 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4137 }
4138
4139 /* advance */
4140 iPhysExt = paPhysExts[iPhysExt].iNext;
4141 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4142 break;
4143 }
4144
4145 /*
4146 * Add another extent to the list.
4147 */
4148 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4149 if (!pNew)
4150 {
4151 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackNoExtentsLeft);
4152 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4153 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4154 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4155 }
4156 pNew->iNext = iPhysExtStart;
4157 pNew->aidx[0] = iShwPT;
4158 pNew->apte[0] = iPte;
4159 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4160 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4161}
4162
4163
4164/**
4165 * Add a reference to guest physical page where extents are in use.
4166 *
4167 * @returns The new tracking data for PGMPAGE.
4168 *
4169 * @param pVM The cross context VM structure.
4170 * @param pPhysPage Pointer to the aPages entry in the ram range.
4171 * @param u16 The ram range flags (top 16-bits).
4172 * @param iShwPT The shadow page table index.
4173 * @param iPte Page table entry
4174 */
4175uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4176{
4177 pgmLock(pVM);
4178 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4179 {
4180 /*
4181 * Convert to extent list.
4182 */
4183 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4184 uint16_t iPhysExt;
4185 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4186 if (pPhysExt)
4187 {
4188 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4189 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliased);
4190 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4191 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4192 pPhysExt->aidx[1] = iShwPT;
4193 pPhysExt->apte[1] = iPte;
4194 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4195 }
4196 else
4197 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4198 }
4199 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4200 {
4201 /*
4202 * Insert into the extent list.
4203 */
4204 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4205 }
4206 else
4207 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedLots);
4208 pgmUnlock(pVM);
4209 return u16;
4210}
4211
4212
4213/**
4214 * Clear references to guest physical memory.
4215 *
4216 * @param pPool The pool.
4217 * @param pPage The page.
4218 * @param pPhysPage Pointer to the aPages entry in the ram range.
4219 * @param iPte Shadow PTE index
4220 */
4221void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4222{
4223 PVM pVM = pPool->CTX_SUFF(pVM);
4224 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4225 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4226
4227 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4228 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4229 {
4230 pgmLock(pVM);
4231
4232 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4233 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4234 do
4235 {
4236 Assert(iPhysExt < pPool->cMaxPhysExts);
4237
4238 /*
4239 * Look for the shadow page and check if it's all freed.
4240 */
4241 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4242 {
4243 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4244 && paPhysExts[iPhysExt].apte[i] == iPte)
4245 {
4246 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4247 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4248
4249 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4250 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4251 {
4252 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4253 pgmUnlock(pVM);
4254 return;
4255 }
4256
4257 /* we can free the node. */
4258 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4259 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4260 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4261 {
4262 /* lonely node */
4263 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4264 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4265 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4266 }
4267 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4268 {
4269 /* head */
4270 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4271 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4272 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4273 }
4274 else
4275 {
4276 /* in list */
4277 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4278 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4279 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4280 }
4281 iPhysExt = iPhysExtNext;
4282 pgmUnlock(pVM);
4283 return;
4284 }
4285 }
4286
4287 /* next */
4288 iPhysExtPrev = iPhysExt;
4289 iPhysExt = paPhysExts[iPhysExt].iNext;
4290 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4291
4292 pgmUnlock(pVM);
4293 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4294 }
4295 else /* nothing to do */
4296 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4297}
4298
4299/**
4300 * Clear references to guest physical memory.
4301 *
4302 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4303 * physical address is assumed to be correct, so the linear search can be
4304 * skipped and we can assert at an earlier point.
4305 *
4306 * @param pPool The pool.
4307 * @param pPage The page.
4308 * @param HCPhys The host physical address corresponding to the guest page.
4309 * @param GCPhys The guest physical address corresponding to HCPhys.
4310 * @param iPte Shadow PTE index
4311 */
4312static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4313{
4314 /*
4315 * Lookup the page and check if it checks out before derefing it.
4316 */
4317 PVM pVM = pPool->CTX_SUFF(pVM);
4318 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4319 if (pPhysPage)
4320 {
4321 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4322#ifdef LOG_ENABLED
4323 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4324 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4325#endif
4326 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4327 {
4328 Assert(pPage->cPresent);
4329 Assert(pPool->cPresent);
4330 pPage->cPresent--;
4331 pPool->cPresent--;
4332 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4333 return;
4334 }
4335
4336 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp\n",
4337 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage)));
4338 }
4339 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4340}
4341
4342
4343/**
4344 * Clear references to guest physical memory.
4345 *
4346 * @param pPool The pool.
4347 * @param pPage The page.
4348 * @param HCPhys The host physical address corresponding to the guest page.
4349 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4350 * @param iPte Shadow pte index
4351 */
4352void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4353{
4354 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4355
4356 /*
4357 * Try the hint first.
4358 */
4359 RTHCPHYS HCPhysHinted;
4360 PVM pVM = pPool->CTX_SUFF(pVM);
4361 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4362 if (pPhysPage)
4363 {
4364 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4365 Assert(HCPhysHinted);
4366 if (HCPhysHinted == HCPhys)
4367 {
4368 Assert(pPage->cPresent);
4369 Assert(pPool->cPresent);
4370 pPage->cPresent--;
4371 pPool->cPresent--;
4372 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4373 return;
4374 }
4375 }
4376 else
4377 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4378
4379 /*
4380 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4381 */
4382 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4383 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX);
4384 while (pRam)
4385 {
4386 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4387 while (iPage-- > 0)
4388 {
4389 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4390 {
4391 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4392 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4393 Assert(pPage->cPresent);
4394 Assert(pPool->cPresent);
4395 pPage->cPresent--;
4396 pPool->cPresent--;
4397 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4398 return;
4399 }
4400 }
4401 pRam = pRam->CTX_SUFF(pNext);
4402 }
4403
4404 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4405}
4406
4407
4408/**
4409 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4410 *
4411 * @param pPool The pool.
4412 * @param pPage The page.
4413 * @param pShwPT The shadow page table (mapping of the page).
4414 * @param pGstPT The guest page table.
4415 */
4416DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4417{
4418 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4419 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4420 {
4421 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4422 if (pShwPT->a[i].n.u1Present)
4423 {
4424 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4425 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4426 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & fPgMask, i);
4427 if (!pPage->cPresent)
4428 break;
4429 }
4430 }
4431}
4432
4433
4434/**
4435 * Clear references to guest physical memory in a PAE / 32-bit page table.
4436 *
4437 * @param pPool The pool.
4438 * @param pPage The page.
4439 * @param pShwPT The shadow page table (mapping of the page).
4440 * @param pGstPT The guest page table (just a half one).
4441 */
4442DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4443{
4444 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4445 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4446 {
4447 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4448 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4449 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4450 {
4451 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4452 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4453 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4454 if (!pPage->cPresent)
4455 break;
4456 }
4457 }
4458}
4459
4460
4461/**
4462 * Clear references to guest physical memory in a PAE / PAE page table.
4463 *
4464 * @param pPool The pool.
4465 * @param pPage The page.
4466 * @param pShwPT The shadow page table (mapping of the page).
4467 * @param pGstPT The guest page table.
4468 */
4469DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4470{
4471 RTGCPHYS const fPgMask = pPage->fA20Enabled ? X86_PTE_PAE_PG_MASK : X86_PTE_PAE_PG_MASK & ~RT_BIT_64(20);
4472 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4473 {
4474 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4475 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4476 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4477 {
4478 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4479 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4480 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4481 if (!pPage->cPresent)
4482 break;
4483 }
4484 }
4485}
4486
4487
4488/**
4489 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4490 *
4491 * @param pPool The pool.
4492 * @param pPage The page.
4493 * @param pShwPT The shadow page table (mapping of the page).
4494 */
4495DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4496{
4497 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4498 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4499 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4500 {
4501 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4502 if (pShwPT->a[i].n.u1Present)
4503 {
4504 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4505 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4506 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4507 if (!pPage->cPresent)
4508 break;
4509 }
4510 }
4511}
4512
4513
4514/**
4515 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4516 *
4517 * @param pPool The pool.
4518 * @param pPage The page.
4519 * @param pShwPT The shadow page table (mapping of the page).
4520 */
4521DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4522{
4523 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4524 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4525 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4526 {
4527 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4528 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4529 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4530 {
4531 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4532 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4533 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys & GCPhysA20Mask, i);
4534 if (!pPage->cPresent)
4535 break;
4536 }
4537 }
4538}
4539
4540
4541/**
4542 * Clear references to shadowed pages in an EPT page table.
4543 *
4544 * @param pPool The pool.
4545 * @param pPage The page.
4546 * @param pShwPT The shadow page directory pointer table (mapping of the
4547 * page).
4548 */
4549DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4550{
4551 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4552 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4553 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4554 {
4555 Assert((pShwPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4556 if (pShwPT->a[i].n.u1Present)
4557 {
4558 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4559 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4560 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4561 if (!pPage->cPresent)
4562 break;
4563 }
4564 }
4565}
4566
4567
4568/**
4569 * Clear references to shadowed pages in a 32 bits page directory.
4570 *
4571 * @param pPool The pool.
4572 * @param pPage The page.
4573 * @param pShwPD The shadow page directory (mapping of the page).
4574 */
4575DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4576{
4577 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4578 {
4579 Assert(!(pShwPD->a[i].u & RT_BIT_32(9)));
4580 if ( pShwPD->a[i].n.u1Present
4581 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4582 )
4583 {
4584 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4585 if (pSubPage)
4586 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4587 else
4588 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4589 }
4590 }
4591}
4592
4593
4594/**
4595 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4596 *
4597 * @param pPool The pool.
4598 * @param pPage The page.
4599 * @param pShwPD The shadow page directory (mapping of the page).
4600 */
4601DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4602{
4603 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4604 {
4605 if ( pShwPD->a[i].n.u1Present
4606 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING))
4607 {
4608#ifdef PGM_WITH_LARGE_PAGES
4609 if (pShwPD->a[i].b.u1Size)
4610 {
4611 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4612 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4613 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK,
4614 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4615 i);
4616 }
4617 else
4618#endif
4619 {
4620 Assert((pShwPD->a[i].u & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4621 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4622 if (pSubPage)
4623 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4624 else
4625 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4626 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4627 }
4628 }
4629 }
4630}
4631
4632
4633/**
4634 * Clear references to shadowed pages in a PAE page directory pointer table.
4635 *
4636 * @param pPool The pool.
4637 * @param pPage The page.
4638 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4639 */
4640DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4641{
4642 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4643 {
4644 Assert((pShwPDPT->a[i].u & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4645 if ( pShwPDPT->a[i].n.u1Present
4646 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4647 )
4648 {
4649 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4650 if (pSubPage)
4651 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4652 else
4653 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4654 }
4655 }
4656}
4657
4658
4659/**
4660 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4661 *
4662 * @param pPool The pool.
4663 * @param pPage The page.
4664 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4665 */
4666DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4667{
4668 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4669 {
4670 Assert((pShwPDPT->a[i].u & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4671 if (pShwPDPT->a[i].n.u1Present)
4672 {
4673 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4674 if (pSubPage)
4675 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4676 else
4677 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4678 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4679 }
4680 }
4681}
4682
4683
4684/**
4685 * Clear references to shadowed pages in a 64-bit level 4 page table.
4686 *
4687 * @param pPool The pool.
4688 * @param pPage The page.
4689 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4690 */
4691DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4692{
4693 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4694 {
4695 Assert((pShwPML4->a[i].u & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4696 if (pShwPML4->a[i].n.u1Present)
4697 {
4698 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4699 if (pSubPage)
4700 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4701 else
4702 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4703 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4704 }
4705 }
4706}
4707
4708
4709/**
4710 * Clear references to shadowed pages in an EPT page directory.
4711 *
4712 * @param pPool The pool.
4713 * @param pPage The page.
4714 * @param pShwPD The shadow page directory (mapping of the page).
4715 */
4716DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4717{
4718 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4719 {
4720 Assert((pShwPD->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4721 if (pShwPD->a[i].n.u1Present)
4722 {
4723#ifdef PGM_WITH_LARGE_PAGES
4724 if (pShwPD->a[i].b.u1Size)
4725 {
4726 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4727 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4728 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK,
4729 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4730 i);
4731 }
4732 else
4733#endif
4734 {
4735 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4736 if (pSubPage)
4737 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4738 else
4739 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4740 }
4741 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4742 }
4743 }
4744}
4745
4746
4747/**
4748 * Clear references to shadowed pages in an EPT page directory pointer table.
4749 *
4750 * @param pPool The pool.
4751 * @param pPage The page.
4752 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4753 */
4754DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4755{
4756 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4757 {
4758 Assert((pShwPDPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4759 if (pShwPDPT->a[i].n.u1Present)
4760 {
4761 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4762 if (pSubPage)
4763 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4764 else
4765 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4766 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4767 }
4768 }
4769}
4770
4771
4772/**
4773 * Clears all references made by this page.
4774 *
4775 * This includes other shadow pages and GC physical addresses.
4776 *
4777 * @param pPool The pool.
4778 * @param pPage The page.
4779 */
4780static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4781{
4782 /*
4783 * Map the shadow page and take action according to the page kind.
4784 */
4785 PVM pVM = pPool->CTX_SUFF(pVM);
4786 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4787 switch (pPage->enmKind)
4788 {
4789 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4790 {
4791 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4792 void *pvGst;
4793 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4794 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4795 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4796 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4797 break;
4798 }
4799
4800 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4801 {
4802 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4803 void *pvGst;
4804 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4805 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
4806 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4807 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4808 break;
4809 }
4810
4811 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4812 {
4813 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4814 void *pvGst;
4815 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4816 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
4817 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4818 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4819 break;
4820 }
4821
4822 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4823 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4824 {
4825 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4826 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4827 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4828 break;
4829 }
4830
4831 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4832 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4833 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4834 {
4835 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4836 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
4837 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4838 break;
4839 }
4840
4841 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4842 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4843 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4844 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4845 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4846 case PGMPOOLKIND_PAE_PD_PHYS:
4847 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4848 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4849 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4850 break;
4851
4852 case PGMPOOLKIND_32BIT_PD_PHYS:
4853 case PGMPOOLKIND_32BIT_PD:
4854 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4855 break;
4856
4857 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4858 case PGMPOOLKIND_PAE_PDPT:
4859 case PGMPOOLKIND_PAE_PDPT_PHYS:
4860 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4861 break;
4862
4863 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4864 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4865 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4866 break;
4867
4868 case PGMPOOLKIND_64BIT_PML4:
4869 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4870 break;
4871
4872 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4873 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4874 break;
4875
4876 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4877 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4878 break;
4879
4880 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4881 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4882 break;
4883
4884 default:
4885 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4886 }
4887
4888 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4889 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4890 ASMMemZeroPage(pvShw);
4891 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4892 pPage->fZeroed = true;
4893 Assert(!pPage->cPresent);
4894 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
4895}
4896
4897
4898/**
4899 * Flushes a pool page.
4900 *
4901 * This moves the page to the free list after removing all user references to it.
4902 *
4903 * @returns VBox status code.
4904 * @retval VINF_SUCCESS on success.
4905 * @param pPool The pool.
4906 * @param pPage The shadow page.
4907 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4908 */
4909int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4910{
4911 PVM pVM = pPool->CTX_SUFF(pVM);
4912 bool fFlushRequired = false;
4913
4914 int rc = VINF_SUCCESS;
4915 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4916 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4917 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4918
4919 /*
4920 * Reject any attempts at flushing any of the special root pages (shall
4921 * not happen).
4922 */
4923 AssertMsgReturn(pPage->idx >= PGMPOOL_IDX_FIRST,
4924 ("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n",
4925 pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx),
4926 VINF_SUCCESS);
4927
4928 pgmLock(pVM);
4929
4930 /*
4931 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4932 */
4933 if (pgmPoolIsPageLocked(pPage))
4934 {
4935 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4936 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4937 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4938 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4939 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4940 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4941 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4942 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4943 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
4944 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
4945 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4946 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4947 pgmUnlock(pVM);
4948 return VINF_SUCCESS;
4949 }
4950
4951#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4952 /* Start a subset so we won't run out of mapping space. */
4953 PVMCPU pVCpu = VMMGetCpu(pVM);
4954 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
4955#endif
4956
4957 /*
4958 * Mark the page as being in need of an ASMMemZeroPage().
4959 */
4960 pPage->fZeroed = false;
4961
4962#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4963 if (pPage->fDirty)
4964 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirtyEntry, false /* do not remove */);
4965#endif
4966
4967 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4968 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4969 fFlushRequired = true;
4970
4971 /*
4972 * Clear the page.
4973 */
4974 pgmPoolTrackClearPageUsers(pPool, pPage);
4975 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4976 pgmPoolTrackDeref(pPool, pPage);
4977 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4978
4979 /*
4980 * Flush it from the cache.
4981 */
4982 pgmPoolCacheFlushPage(pPool, pPage);
4983
4984#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4985 /* Heavy stuff done. */
4986 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
4987#endif
4988
4989 /*
4990 * Deregistering the monitoring.
4991 */
4992 if (pPage->fMonitored)
4993 rc = pgmPoolMonitorFlush(pPool, pPage);
4994
4995 /*
4996 * Free the page.
4997 */
4998 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4999 pPage->iNext = pPool->iFreeHead;
5000 pPool->iFreeHead = pPage->idx;
5001 pPage->enmKind = PGMPOOLKIND_FREE;
5002 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5003 pPage->GCPhys = NIL_RTGCPHYS;
5004 pPage->fReusedFlushPending = false;
5005
5006 pPool->cUsedPages--;
5007
5008 /* Flush the TLBs of all VCPUs if required. */
5009 if ( fFlushRequired
5010 && fFlush)
5011 {
5012 PGM_INVL_ALL_VCPU_TLBS(pVM);
5013 }
5014
5015 pgmUnlock(pVM);
5016 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
5017 return rc;
5018}
5019
5020
5021/**
5022 * Frees a usage of a pool page.
5023 *
5024 * The caller is responsible to updating the user table so that it no longer
5025 * references the shadow page.
5026 *
5027 * @param pPool The pool.
5028 * @param pPage The shadow page.
5029 * @param iUser The shadow page pool index of the user table.
5030 * NIL_PGMPOOL_IDX for root pages.
5031 * @param iUserTable The index into the user table (shadowed). Ignored if
5032 * root page.
5033 */
5034void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
5035{
5036 PVM pVM = pPool->CTX_SUFF(pVM);
5037
5038 STAM_PROFILE_START(&pPool->StatFree, a);
5039 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
5040 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
5041 AssertReturnVoid(pPage->idx >= PGMPOOL_IDX_FIRST); /* paranoia (#6349) */
5042
5043 pgmLock(pVM);
5044 if (iUser != NIL_PGMPOOL_IDX)
5045 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
5046 if (!pPage->fCached)
5047 pgmPoolFlushPage(pPool, pPage);
5048 pgmUnlock(pVM);
5049 STAM_PROFILE_STOP(&pPool->StatFree, a);
5050}
5051
5052
5053/**
5054 * Makes one or more free page free.
5055 *
5056 * @returns VBox status code.
5057 * @retval VINF_SUCCESS on success.
5058 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
5059 *
5060 * @param pPool The pool.
5061 * @param enmKind Page table kind
5062 * @param iUser The user of the page.
5063 */
5064static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
5065{
5066 PVM pVM = pPool->CTX_SUFF(pVM);
5067 LogFlow(("pgmPoolMakeMoreFreePages: enmKind=%d iUser=%d\n", enmKind, iUser));
5068 NOREF(enmKind);
5069
5070 /*
5071 * If the pool isn't full grown yet, expand it.
5072 */
5073 if ( pPool->cCurPages < pPool->cMaxPages
5074#if defined(IN_RC)
5075 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
5076 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
5077 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
5078#endif
5079 )
5080 {
5081 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
5082#ifdef IN_RING3
5083 int rc = PGMR3PoolGrow(pVM);
5084#else
5085 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
5086#endif
5087 if (RT_FAILURE(rc))
5088 return rc;
5089 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
5090 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
5091 return VINF_SUCCESS;
5092 }
5093
5094 /*
5095 * Free one cached page.
5096 */
5097 return pgmPoolCacheFreeOne(pPool, iUser);
5098}
5099
5100
5101/**
5102 * Allocates a page from the pool.
5103 *
5104 * This page may actually be a cached page and not in need of any processing
5105 * on the callers part.
5106 *
5107 * @returns VBox status code.
5108 * @retval VINF_SUCCESS if a NEW page was allocated.
5109 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
5110 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
5111 *
5112 * @param pVM The cross context VM structure.
5113 * @param GCPhys The GC physical address of the page we're gonna shadow.
5114 * For 4MB and 2MB PD entries, it's the first address the
5115 * shadow PT is covering.
5116 * @param enmKind The kind of mapping.
5117 * @param enmAccess Access type for the mapping (only relevant for big pages)
5118 * @param fA20Enabled Whether the A20 gate is enabled or not.
5119 * @param iUser The shadow page pool index of the user table. Root
5120 * pages should pass NIL_PGMPOOL_IDX.
5121 * @param iUserTable The index into the user table (shadowed). Ignored for
5122 * root pages (iUser == NIL_PGMPOOL_IDX).
5123 * @param fLockPage Lock the page
5124 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
5125 */
5126int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
5127 uint16_t iUser, uint32_t iUserTable, bool fLockPage, PPPGMPOOLPAGE ppPage)
5128{
5129 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5130 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
5131 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
5132 *ppPage = NULL;
5133 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
5134 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
5135 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
5136
5137 pgmLock(pVM);
5138
5139 if (pPool->fCacheEnabled)
5140 {
5141 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, fA20Enabled, iUser, iUserTable, ppPage);
5142 if (RT_SUCCESS(rc2))
5143 {
5144 if (fLockPage)
5145 pgmPoolLockPage(pPool, *ppPage);
5146 pgmUnlock(pVM);
5147 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5148 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
5149 return rc2;
5150 }
5151 }
5152
5153 /*
5154 * Allocate a new one.
5155 */
5156 int rc = VINF_SUCCESS;
5157 uint16_t iNew = pPool->iFreeHead;
5158 if (iNew == NIL_PGMPOOL_IDX)
5159 {
5160 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
5161 if (RT_FAILURE(rc))
5162 {
5163 pgmUnlock(pVM);
5164 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
5165 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5166 return rc;
5167 }
5168 iNew = pPool->iFreeHead;
5169 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_PGM_POOL_IPE);
5170 }
5171
5172 /* unlink the free head */
5173 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5174 pPool->iFreeHead = pPage->iNext;
5175 pPage->iNext = NIL_PGMPOOL_IDX;
5176
5177 /*
5178 * Initialize it.
5179 */
5180 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5181 pPage->enmKind = enmKind;
5182 pPage->enmAccess = enmAccess;
5183 pPage->GCPhys = GCPhys;
5184 pPage->fA20Enabled = fA20Enabled;
5185 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5186 pPage->fMonitored = false;
5187 pPage->fCached = false;
5188 pPage->fDirty = false;
5189 pPage->fReusedFlushPending = false;
5190 pPage->cModifications = 0;
5191 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5192 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5193 pPage->cPresent = 0;
5194 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5195 pPage->idxDirtyEntry = 0;
5196 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5197 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5198 pPage->cLastAccessHandler = 0;
5199 pPage->cLocked = 0;
5200# ifdef VBOX_STRICT
5201 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5202# endif
5203
5204 /*
5205 * Insert into the tracking and cache. If this fails, free the page.
5206 */
5207 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5208 if (RT_FAILURE(rc3))
5209 {
5210 pPool->cUsedPages--;
5211 pPage->enmKind = PGMPOOLKIND_FREE;
5212 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5213 pPage->GCPhys = NIL_RTGCPHYS;
5214 pPage->iNext = pPool->iFreeHead;
5215 pPool->iFreeHead = pPage->idx;
5216 pgmUnlock(pVM);
5217 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5218 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
5219 return rc3;
5220 }
5221
5222 /*
5223 * Commit the allocation, clear the page and return.
5224 */
5225#ifdef VBOX_WITH_STATISTICS
5226 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5227 pPool->cUsedPagesHigh = pPool->cUsedPages;
5228#endif
5229
5230 if (!pPage->fZeroed)
5231 {
5232 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5233 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5234 ASMMemZeroPage(pv);
5235 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5236 }
5237
5238 *ppPage = pPage;
5239 if (fLockPage)
5240 pgmPoolLockPage(pPool, pPage);
5241 pgmUnlock(pVM);
5242 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5243 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5244 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5245 return rc;
5246}
5247
5248
5249/**
5250 * Frees a usage of a pool page.
5251 *
5252 * @param pVM The cross context VM structure.
5253 * @param HCPhys The HC physical address of the shadow page.
5254 * @param iUser The shadow page pool index of the user table.
5255 * NIL_PGMPOOL_IDX if root page.
5256 * @param iUserTable The index into the user table (shadowed). Ignored if
5257 * root page.
5258 */
5259void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5260{
5261 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5262 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5263 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5264}
5265
5266
5267/**
5268 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5269 *
5270 * @returns Pointer to the shadow page structure.
5271 * @param pPool The pool.
5272 * @param HCPhys The HC physical address of the shadow page.
5273 */
5274PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5275{
5276 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5277
5278 /*
5279 * Look up the page.
5280 */
5281 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5282
5283 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5284 return pPage;
5285}
5286
5287
5288/**
5289 * Internal worker for finding a page for debugging purposes, no assertions.
5290 *
5291 * @returns Pointer to the shadow page structure. NULL on if not found.
5292 * @param pPool The pool.
5293 * @param HCPhys The HC physical address of the shadow page.
5294 */
5295PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5296{
5297 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5298 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5299}
5300
5301#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5302
5303/**
5304 * Flush the specified page if present
5305 *
5306 * @param pVM The cross context VM structure.
5307 * @param GCPhys Guest physical address of the page to flush
5308 */
5309void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5310{
5311 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5312
5313 VM_ASSERT_EMT(pVM);
5314
5315 /*
5316 * Look up the GCPhys in the hash.
5317 */
5318 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5319 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5320 if (i == NIL_PGMPOOL_IDX)
5321 return;
5322
5323 do
5324 {
5325 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5326 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5327 {
5328 switch (pPage->enmKind)
5329 {
5330 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5331 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5332 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5333 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5334 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5335 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5336 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5337 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5338 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5339 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5340 case PGMPOOLKIND_64BIT_PML4:
5341 case PGMPOOLKIND_32BIT_PD:
5342 case PGMPOOLKIND_PAE_PDPT:
5343 {
5344 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5345#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5346 if (pPage->fDirty)
5347 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5348 else
5349#endif
5350 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5351 Assert(!pgmPoolIsPageLocked(pPage));
5352 pgmPoolMonitorChainFlush(pPool, pPage);
5353 return;
5354 }
5355
5356 /* ignore, no monitoring. */
5357 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5358 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5359 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5360 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5361 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5362 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5363 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5364 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5365 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5366 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5367 case PGMPOOLKIND_ROOT_NESTED:
5368 case PGMPOOLKIND_PAE_PD_PHYS:
5369 case PGMPOOLKIND_PAE_PDPT_PHYS:
5370 case PGMPOOLKIND_32BIT_PD_PHYS:
5371 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5372 break;
5373
5374 default:
5375 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5376 }
5377 }
5378
5379 /* next */
5380 i = pPage->iNext;
5381 } while (i != NIL_PGMPOOL_IDX);
5382 return;
5383}
5384
5385#endif /* IN_RING3 */
5386#ifdef IN_RING3
5387
5388/**
5389 * Reset CPU on hot plugging.
5390 *
5391 * @param pVM The cross context VM structure.
5392 * @param pVCpu The cross context virtual CPU structure.
5393 */
5394void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5395{
5396 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5397
5398 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5399 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5400 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5401}
5402
5403
5404/**
5405 * Flushes the entire cache.
5406 *
5407 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5408 * this and execute this CR3 flush.
5409 *
5410 * @param pVM The cross context VM structure.
5411 */
5412void pgmR3PoolReset(PVM pVM)
5413{
5414 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5415
5416 PGM_LOCK_ASSERT_OWNER(pVM);
5417 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5418 LogFlow(("pgmR3PoolReset:\n"));
5419
5420 /*
5421 * If there are no pages in the pool, there is nothing to do.
5422 */
5423 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5424 {
5425 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5426 return;
5427 }
5428
5429 /*
5430 * Exit the shadow mode since we're going to clear everything,
5431 * including the root page.
5432 */
5433 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5434 pgmR3ExitShadowModeBeforePoolFlush(&pVM->aCpus[i]);
5435
5436 /*
5437 * Nuke the free list and reinsert all pages into it.
5438 */
5439 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5440 {
5441 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5442
5443 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
5444 if (pPage->fMonitored)
5445 pgmPoolMonitorFlush(pPool, pPage);
5446 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5447 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5448 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5449 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5450 pPage->cModifications = 0;
5451 pPage->GCPhys = NIL_RTGCPHYS;
5452 pPage->enmKind = PGMPOOLKIND_FREE;
5453 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5454 Assert(pPage->idx == i);
5455 pPage->iNext = i + 1;
5456 pPage->fA20Enabled = true;
5457 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5458 pPage->fSeenNonGlobal = false;
5459 pPage->fMonitored = false;
5460 pPage->fDirty = false;
5461 pPage->fCached = false;
5462 pPage->fReusedFlushPending = false;
5463 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5464 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5465 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5466 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5467 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5468 pPage->cLastAccessHandler = 0;
5469 pPage->cLocked = 0;
5470#ifdef VBOX_STRICT
5471 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5472#endif
5473 }
5474 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5475 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5476 pPool->cUsedPages = 0;
5477
5478 /*
5479 * Zap and reinitialize the user records.
5480 */
5481 pPool->cPresent = 0;
5482 pPool->iUserFreeHead = 0;
5483 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5484 const unsigned cMaxUsers = pPool->cMaxUsers;
5485 for (unsigned i = 0; i < cMaxUsers; i++)
5486 {
5487 paUsers[i].iNext = i + 1;
5488 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5489 paUsers[i].iUserTable = 0xfffffffe;
5490 }
5491 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5492
5493 /*
5494 * Clear all the GCPhys links and rebuild the phys ext free list.
5495 */
5496 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
5497 pRam;
5498 pRam = pRam->CTX_SUFF(pNext))
5499 {
5500 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5501 while (iPage-- > 0)
5502 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5503 }
5504
5505 pPool->iPhysExtFreeHead = 0;
5506 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5507 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5508 for (unsigned i = 0; i < cMaxPhysExts; i++)
5509 {
5510 paPhysExts[i].iNext = i + 1;
5511 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5512 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5513 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5514 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5515 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5516 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5517 }
5518 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5519
5520 /*
5521 * Just zap the modified list.
5522 */
5523 pPool->cModifiedPages = 0;
5524 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5525
5526 /*
5527 * Clear the GCPhys hash and the age list.
5528 */
5529 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5530 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5531 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5532 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5533
5534#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5535 /* Clear all dirty pages. */
5536 pPool->idxFreeDirtyPage = 0;
5537 pPool->cDirtyPages = 0;
5538 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
5539 pPool->aDirtyPages[i].uIdx = NIL_PGMPOOL_IDX;
5540#endif
5541
5542 /*
5543 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5544 */
5545 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5546 {
5547 /*
5548 * Re-enter the shadowing mode and assert Sync CR3 FF.
5549 */
5550 PVMCPU pVCpu = &pVM->aCpus[i];
5551 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5552 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5553 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5554 }
5555
5556 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5557}
5558
5559#endif /* IN_RING3 */
5560
5561#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5562/**
5563 * Stringifies a PGMPOOLKIND value.
5564 */
5565static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5566{
5567 switch ((PGMPOOLKIND)enmKind)
5568 {
5569 case PGMPOOLKIND_INVALID:
5570 return "PGMPOOLKIND_INVALID";
5571 case PGMPOOLKIND_FREE:
5572 return "PGMPOOLKIND_FREE";
5573 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5574 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5575 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5576 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5577 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5578 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5579 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5580 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5581 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5582 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5583 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5584 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5585 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5586 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5587 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5588 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5589 case PGMPOOLKIND_32BIT_PD:
5590 return "PGMPOOLKIND_32BIT_PD";
5591 case PGMPOOLKIND_32BIT_PD_PHYS:
5592 return "PGMPOOLKIND_32BIT_PD_PHYS";
5593 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5594 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5595 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5596 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5597 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5598 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5599 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5600 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5601 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5602 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5603 case PGMPOOLKIND_PAE_PD_PHYS:
5604 return "PGMPOOLKIND_PAE_PD_PHYS";
5605 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5606 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5607 case PGMPOOLKIND_PAE_PDPT:
5608 return "PGMPOOLKIND_PAE_PDPT";
5609 case PGMPOOLKIND_PAE_PDPT_PHYS:
5610 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5611 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5612 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5613 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5614 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5615 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5616 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5617 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5618 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5619 case PGMPOOLKIND_64BIT_PML4:
5620 return "PGMPOOLKIND_64BIT_PML4";
5621 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5622 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5623 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5624 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5625 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5626 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5627 case PGMPOOLKIND_ROOT_NESTED:
5628 return "PGMPOOLKIND_ROOT_NESTED";
5629 }
5630 return "Unknown kind!";
5631}
5632#endif /* LOG_ENABLED || VBOX_STRICT */
5633
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette