VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 65901

Last change on this file since 65901 was 65901, checked in by vboxsync, 8 years ago

Reapplied r113097 (backed out in r113137): pgmR3PoolReset: A few missing members (harmless).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 216.1 KB
Line 
1/* $Id: PGMAllPool.cpp 65901 2017-02-28 14:56:52Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2016 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#include <VBox/vmm/pgm.h>
24#include <VBox/vmm/mm.h>
25#include <VBox/vmm/em.h>
26#include <VBox/vmm/cpum.h>
27#ifdef IN_RC
28# include <VBox/vmm/patm.h>
29#endif
30#include "PGMInternal.h"
31#include <VBox/vmm/vm.h>
32#include "PGMInline.h"
33#include <VBox/disopcode.h>
34#include <VBox/vmm/hm_vmx.h>
35
36#include <VBox/log.h>
37#include <VBox/err.h>
38#include <iprt/asm.h>
39#include <iprt/asm-amd64-x86.h>
40#include <iprt/string.h>
41
42
43/*********************************************************************************************************************************
44* Internal Functions *
45*********************************************************************************************************************************/
46RT_C_DECLS_BEGIN
47#if 0 /* unused */
48DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
49DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
50#endif /* unused */
51static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
52static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
54static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
55#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
56static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
57#endif
58#if 0 /*defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)*/
59static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
60#endif
61
62int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
63PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
64void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
65void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
66
67RT_C_DECLS_END
68
69
70#if 0 /* unused */
71/**
72 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
73 *
74 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
75 * @param enmKind The page kind.
76 */
77DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
78{
79 switch (enmKind)
80 {
81 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
82 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
83 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
84 return true;
85 default:
86 return false;
87 }
88}
89#endif /* unused */
90
91
92/**
93 * Flushes a chain of pages sharing the same access monitor.
94 *
95 * @returns VBox status code suitable for scheduling.
96 * @param pPool The pool.
97 * @param pPage A page in the chain.
98 * @todo VBOXSTRICTRC
99 */
100int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
101{
102 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
103
104 /*
105 * Find the list head.
106 */
107 uint16_t idx = pPage->idx;
108 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
109 {
110 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
111 {
112 idx = pPage->iMonitoredPrev;
113 Assert(idx != pPage->idx);
114 pPage = &pPool->aPages[idx];
115 }
116 }
117
118 /*
119 * Iterate the list flushing each shadow page.
120 */
121 int rc = VINF_SUCCESS;
122 for (;;)
123 {
124 idx = pPage->iMonitoredNext;
125 Assert(idx != pPage->idx);
126 if (pPage->idx >= PGMPOOL_IDX_FIRST)
127 {
128 int rc2 = pgmPoolFlushPage(pPool, pPage);
129 AssertRC(rc2);
130 }
131 /* next */
132 if (idx == NIL_PGMPOOL_IDX)
133 break;
134 pPage = &pPool->aPages[idx];
135 }
136 return rc;
137}
138
139
140/**
141 * Wrapper for getting the current context pointer to the entry being modified.
142 *
143 * @returns VBox status code suitable for scheduling.
144 * @param pVM The cross context VM structure.
145 * @param pvDst Destination address
146 * @param pvSrc Pointer to the mapping of @a GCPhysSrc or NULL depending
147 * on the context (e.g. \#PF in R0 & RC).
148 * @param GCPhysSrc The source guest physical address.
149 * @param cb Size of data to read
150 */
151DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, void const *pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
152{
153#if defined(IN_RING3)
154 NOREF(pVM); NOREF(GCPhysSrc);
155 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
156 return VINF_SUCCESS;
157#else
158 /** @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
159 NOREF(pvSrc);
160 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
161#endif
162}
163
164
165/**
166 * Process shadow entries before they are changed by the guest.
167 *
168 * For PT entries we will clear them. For PD entries, we'll simply check
169 * for mapping conflicts and set the SyncCR3 FF if found.
170 *
171 * @param pVCpu The cross context virtual CPU structure.
172 * @param pPool The pool.
173 * @param pPage The head page.
174 * @param GCPhysFault The guest physical fault address.
175 * @param pvAddress Pointer to the mapping of @a GCPhysFault or NULL
176 * depending on the context (e.g. \#PF in R0 & RC).
177 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
178 */
179static void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault,
180 void const *pvAddress, unsigned cbWrite)
181{
182 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
183 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
184 PVM pVM = pPool->CTX_SUFF(pVM);
185 NOREF(pVCpu);
186
187 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n",
188 (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))(uintptr_t)pvAddress, GCPhysFault, cbWrite));
189
190 for (;;)
191 {
192 union
193 {
194 void *pv;
195 PX86PT pPT;
196 PPGMSHWPTPAE pPTPae;
197 PX86PD pPD;
198 PX86PDPAE pPDPae;
199 PX86PDPT pPDPT;
200 PX86PML4 pPML4;
201 } uShw;
202
203 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s write=%#x\n",
204 pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
205
206 uShw.pv = NULL;
207 switch (pPage->enmKind)
208 {
209 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
210 {
211 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
212 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
213 const unsigned iShw = off / sizeof(X86PTE);
214 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
215 if (uShw.pPT->a[iShw].n.u1Present)
216 {
217 X86PTE GstPte;
218
219 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
220 AssertRC(rc);
221 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
222 pgmPoolTracDerefGCPhysHint(pPool, pPage,
223 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
224 GstPte.u & X86_PTE_PG_MASK,
225 iShw);
226 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
227 }
228 break;
229 }
230
231 /* page/2 sized */
232 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
233 {
234 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
235 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
236 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
237 {
238 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
239 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
240 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
241 {
242 X86PTE GstPte;
243 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
244 AssertRC(rc);
245
246 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
247 pgmPoolTracDerefGCPhysHint(pPool, pPage,
248 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
249 GstPte.u & X86_PTE_PG_MASK,
250 iShw);
251 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
252 }
253 }
254 break;
255 }
256
257 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
258 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
259 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
260 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
261 {
262 unsigned iGst = off / sizeof(X86PDE);
263 unsigned iShwPdpt = iGst / 256;
264 unsigned iShw = (iGst % 256) * 2;
265 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
266
267 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
268 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
269 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
270 {
271 for (unsigned i = 0; i < 2; i++)
272 {
273# ifdef VBOX_WITH_RAW_MODE_NOT_R0
274 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
275 {
276 Assert(pgmMapAreMappingsEnabled(pVM));
277 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
278 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
279 break;
280 }
281# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
282 if (uShw.pPDPae->a[iShw+i].n.u1Present)
283 {
284 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
285 pgmPoolFree(pVM,
286 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
287 pPage->idx,
288 iShw + i);
289 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw+i].u, 0);
290 }
291
292 /* paranoia / a bit assumptive. */
293 if ( (off & 3)
294 && (off & 3) + cbWrite > 4)
295 {
296 const unsigned iShw2 = iShw + 2 + i;
297 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
298 {
299# ifdef VBOX_WITH_RAW_MODE_NOT_R0
300 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
301 {
302 Assert(pgmMapAreMappingsEnabled(pVM));
303 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
304 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
305 break;
306 }
307# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
308 if (uShw.pPDPae->a[iShw2].n.u1Present)
309 {
310 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
311 pgmPoolFree(pVM,
312 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
313 pPage->idx,
314 iShw2);
315 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
316 }
317 }
318 }
319 }
320 }
321 break;
322 }
323
324 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
325 {
326 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
327 const unsigned iShw = off / sizeof(X86PTEPAE);
328 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
329 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
330 {
331 X86PTEPAE GstPte;
332 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
333 AssertRC(rc);
334
335 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
336 pgmPoolTracDerefGCPhysHint(pPool, pPage,
337 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
338 GstPte.u & X86_PTE_PAE_PG_MASK,
339 iShw);
340 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
341 }
342
343 /* paranoia / a bit assumptive. */
344 if ( (off & 7)
345 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
346 {
347 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
348 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
349
350 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
351 {
352 X86PTEPAE GstPte;
353 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
354 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
355 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
356 AssertRC(rc);
357 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
358 pgmPoolTracDerefGCPhysHint(pPool, pPage,
359 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
360 GstPte.u & X86_PTE_PAE_PG_MASK,
361 iShw2);
362 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
363 }
364 }
365 break;
366 }
367
368 case PGMPOOLKIND_32BIT_PD:
369 {
370 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
371 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
372
373 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
374 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
375# ifdef VBOX_WITH_RAW_MODE_NOT_R0
376 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
377 {
378 Assert(pgmMapAreMappingsEnabled(pVM));
379 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
380 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
381 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
382 break;
383 }
384 else
385# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
386 {
387 if (uShw.pPD->a[iShw].n.u1Present)
388 {
389 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
390 pgmPoolFree(pVM,
391 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
392 pPage->idx,
393 iShw);
394 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
395 }
396 }
397 /* paranoia / a bit assumptive. */
398 if ( (off & 3)
399 && (off & 3) + cbWrite > sizeof(X86PTE))
400 {
401 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
402 if ( iShw2 != iShw
403 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
404 {
405# ifdef VBOX_WITH_RAW_MODE_NOT_R0
406 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
407 {
408 Assert(pgmMapAreMappingsEnabled(pVM));
409 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
410 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
411 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
412 break;
413 }
414# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
415 if (uShw.pPD->a[iShw2].n.u1Present)
416 {
417 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
418 pgmPoolFree(pVM,
419 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
420 pPage->idx,
421 iShw2);
422 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
423 }
424 }
425 }
426#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). - not working any longer... */
427 if ( uShw.pPD->a[iShw].n.u1Present
428 && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
429 {
430 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
431# ifdef IN_RC /* TLB load - we're pushing things a bit... */
432 ASMProbeReadByte(pvAddress);
433# endif
434 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
435 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
436 }
437#endif
438 break;
439 }
440
441 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
442 {
443 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
444 const unsigned iShw = off / sizeof(X86PDEPAE);
445 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
446#ifdef VBOX_WITH_RAW_MODE_NOT_R0
447 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
448 {
449 Assert(pgmMapAreMappingsEnabled(pVM));
450 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
451 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
452 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
453 break;
454 }
455#endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
456 /*
457 * Causes trouble when the guest uses a PDE to refer to the whole page table level
458 * structure. (Invalidate here; faults later on when it tries to change the page
459 * table entries -> recheck; probably only applies to the RC case.)
460 */
461#ifdef VBOX_WITH_RAW_MODE_NOT_R0
462 else
463#endif
464 {
465 if (uShw.pPDPae->a[iShw].n.u1Present)
466 {
467 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
468 pgmPoolFree(pVM,
469 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
470 pPage->idx,
471 iShw);
472 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
473 }
474 }
475 /* paranoia / a bit assumptive. */
476 if ( (off & 7)
477 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
478 {
479 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
480 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
481
482#ifdef VBOX_WITH_RAW_MODE_NOT_R0
483 if ( iShw2 != iShw
484 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
485 {
486 Assert(pgmMapAreMappingsEnabled(pVM));
487 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
488 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
489 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
490 break;
491 }
492 else
493#endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
494 if (uShw.pPDPae->a[iShw2].n.u1Present)
495 {
496 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
497 pgmPoolFree(pVM,
498 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
499 pPage->idx,
500 iShw2);
501 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
502 }
503 }
504 break;
505 }
506
507 case PGMPOOLKIND_PAE_PDPT:
508 {
509 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
510 /*
511 * Hopefully this doesn't happen very often:
512 * - touching unused parts of the page
513 * - messing with the bits of pd pointers without changing the physical address
514 */
515 /* PDPT roots are not page aligned; 32 byte only! */
516 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
517
518 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
519 const unsigned iShw = offPdpt / sizeof(X86PDPE);
520 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
521 {
522# ifdef VBOX_WITH_RAW_MODE_NOT_R0
523 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
524 {
525 Assert(pgmMapAreMappingsEnabled(pVM));
526 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
527 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
528 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
529 break;
530 }
531 else
532# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
533 if (uShw.pPDPT->a[iShw].n.u1Present)
534 {
535 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
536 pgmPoolFree(pVM,
537 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
538 pPage->idx,
539 iShw);
540 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
541 }
542
543 /* paranoia / a bit assumptive. */
544 if ( (offPdpt & 7)
545 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
546 {
547 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
548 if ( iShw2 != iShw
549 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
550 {
551# ifdef VBOX_WITH_RAW_MODE_NOT_R0
552 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
553 {
554 Assert(pgmMapAreMappingsEnabled(pVM));
555 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
556 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
557 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
558 break;
559 }
560 else
561# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
562 if (uShw.pPDPT->a[iShw2].n.u1Present)
563 {
564 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
565 pgmPoolFree(pVM,
566 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
567 pPage->idx,
568 iShw2);
569 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
570 }
571 }
572 }
573 }
574 break;
575 }
576
577#ifndef IN_RC
578 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
579 {
580 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
581 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
582 const unsigned iShw = off / sizeof(X86PDEPAE);
583 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
584 if (uShw.pPDPae->a[iShw].n.u1Present)
585 {
586 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
587 pgmPoolFree(pVM,
588 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
589 pPage->idx,
590 iShw);
591 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
592 }
593 /* paranoia / a bit assumptive. */
594 if ( (off & 7)
595 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
596 {
597 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
598 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
599
600 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
601 if (uShw.pPDPae->a[iShw2].n.u1Present)
602 {
603 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
604 pgmPoolFree(pVM,
605 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
606 pPage->idx,
607 iShw2);
608 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
609 }
610 }
611 break;
612 }
613
614 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
615 {
616 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
617 /*
618 * Hopefully this doesn't happen very often:
619 * - messing with the bits of pd pointers without changing the physical address
620 */
621 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
622 const unsigned iShw = off / sizeof(X86PDPE);
623 if (uShw.pPDPT->a[iShw].n.u1Present)
624 {
625 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
626 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
627 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
628 }
629 /* paranoia / a bit assumptive. */
630 if ( (off & 7)
631 && (off & 7) + cbWrite > sizeof(X86PDPE))
632 {
633 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
634 if (uShw.pPDPT->a[iShw2].n.u1Present)
635 {
636 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
637 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
638 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
639 }
640 }
641 break;
642 }
643
644 case PGMPOOLKIND_64BIT_PML4:
645 {
646 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
647 /*
648 * Hopefully this doesn't happen very often:
649 * - messing with the bits of pd pointers without changing the physical address
650 */
651 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
652 const unsigned iShw = off / sizeof(X86PDPE);
653 if (uShw.pPML4->a[iShw].n.u1Present)
654 {
655 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
656 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
657 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
658 }
659 /* paranoia / a bit assumptive. */
660 if ( (off & 7)
661 && (off & 7) + cbWrite > sizeof(X86PDPE))
662 {
663 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
664 if (uShw.pPML4->a[iShw2].n.u1Present)
665 {
666 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
667 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
668 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
669 }
670 }
671 break;
672 }
673#endif /* IN_RING0 */
674
675 default:
676 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
677 }
678 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
679
680 /* next */
681 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
682 return;
683 pPage = &pPool->aPages[pPage->iMonitoredNext];
684 }
685}
686
687#ifndef IN_RING3
688
689/**
690 * Checks if a access could be a fork operation in progress.
691 *
692 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
693 *
694 * @returns true if it's likely that we're forking, otherwise false.
695 * @param pPool The pool.
696 * @param pDis The disassembled instruction.
697 * @param offFault The access offset.
698 */
699DECLINLINE(bool) pgmRZPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
700{
701 /*
702 * i386 linux is using btr to clear X86_PTE_RW.
703 * The functions involved are (2.6.16 source inspection):
704 * clear_bit
705 * ptep_set_wrprotect
706 * copy_one_pte
707 * copy_pte_range
708 * copy_pmd_range
709 * copy_pud_range
710 * copy_page_range
711 * dup_mmap
712 * dup_mm
713 * copy_mm
714 * copy_process
715 * do_fork
716 */
717 if ( pDis->pCurInstr->uOpcode == OP_BTR
718 && !(offFault & 4)
719 /** @todo Validate that the bit index is X86_PTE_RW. */
720 )
721 {
722 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,Fork)); RT_NOREF_PV(pPool);
723 return true;
724 }
725 return false;
726}
727
728
729/**
730 * Determine whether the page is likely to have been reused.
731 *
732 * @returns true if we consider the page as being reused for a different purpose.
733 * @returns false if we consider it to still be a paging page.
734 * @param pVM The cross context VM structure.
735 * @param pVCpu The cross context virtual CPU structure.
736 * @param pRegFrame Trap register frame.
737 * @param pDis The disassembly info for the faulting instruction.
738 * @param pvFault The fault address.
739 *
740 * @remark The REP prefix check is left to the caller because of STOSD/W.
741 */
742DECLINLINE(bool) pgmRZPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
743{
744# ifndef IN_RC
745 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
746 if ( HMHasPendingIrq(pVM)
747 && (pRegFrame->rsp - pvFault) < 32)
748 {
749 /* Fault caused by stack writes while trying to inject an interrupt event. */
750 Log(("pgmRZPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
751 return true;
752 }
753# else
754 NOREF(pVM); NOREF(pvFault);
755# endif
756
757 LogFlow(("Reused instr %RGv %d at %RGv param1.fUse=%llx param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->uOpcode, pvFault, pDis->Param1.fUse, pDis->Param1.Base.idxGenReg));
758
759 /* Non-supervisor mode write means it's used for something else. */
760 if (CPUMGetGuestCPL(pVCpu) == 3)
761 return true;
762
763 switch (pDis->pCurInstr->uOpcode)
764 {
765 /* call implies the actual push of the return address faulted */
766 case OP_CALL:
767 Log4(("pgmRZPoolMonitorIsReused: CALL\n"));
768 return true;
769 case OP_PUSH:
770 Log4(("pgmRZPoolMonitorIsReused: PUSH\n"));
771 return true;
772 case OP_PUSHF:
773 Log4(("pgmRZPoolMonitorIsReused: PUSHF\n"));
774 return true;
775 case OP_PUSHA:
776 Log4(("pgmRZPoolMonitorIsReused: PUSHA\n"));
777 return true;
778 case OP_FXSAVE:
779 Log4(("pgmRZPoolMonitorIsReused: FXSAVE\n"));
780 return true;
781 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
782 Log4(("pgmRZPoolMonitorIsReused: MOVNTI\n"));
783 return true;
784 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
785 Log4(("pgmRZPoolMonitorIsReused: MOVNTDQ\n"));
786 return true;
787 case OP_MOVSWD:
788 case OP_STOSWD:
789 if ( pDis->fPrefix == (DISPREFIX_REP|DISPREFIX_REX)
790 && pRegFrame->rcx >= 0x40
791 )
792 {
793 Assert(pDis->uCpuMode == DISCPUMODE_64BIT);
794
795 Log(("pgmRZPoolMonitorIsReused: OP_STOSQ\n"));
796 return true;
797 }
798 break;
799
800 default:
801 /*
802 * Anything having ESP on the left side means stack writes.
803 */
804 if ( ( (pDis->Param1.fUse & DISUSE_REG_GEN32)
805 || (pDis->Param1.fUse & DISUSE_REG_GEN64))
806 && (pDis->Param1.Base.idxGenReg == DISGREG_ESP))
807 {
808 Log4(("pgmRZPoolMonitorIsReused: ESP\n"));
809 return true;
810 }
811 break;
812 }
813
814 /*
815 * Page table updates are very very unlikely to be crossing page boundraries,
816 * and we don't want to deal with that in pgmPoolMonitorChainChanging and such.
817 */
818 uint32_t const cbWrite = DISGetParamSize(pDis, &pDis->Param1);
819 if ( (((uintptr_t)pvFault + cbWrite) >> X86_PAGE_SHIFT) != ((uintptr_t)pvFault >> X86_PAGE_SHIFT) )
820 {
821 Log4(("pgmRZPoolMonitorIsReused: cross page write\n"));
822 return true;
823 }
824
825 /*
826 * Nobody does an unaligned 8 byte write to a page table, right.
827 */
828 if (cbWrite >= 8 && ((uintptr_t)pvFault & 7) != 0)
829 {
830 Log4(("pgmRZPoolMonitorIsReused: Unaligned 8+ byte write\n"));
831 return true;
832 }
833
834 return false;
835}
836
837
838/**
839 * Flushes the page being accessed.
840 *
841 * @returns VBox status code suitable for scheduling.
842 * @param pVM The cross context VM structure.
843 * @param pVCpu The cross context virtual CPU structure.
844 * @param pPool The pool.
845 * @param pPage The pool page (head).
846 * @param pDis The disassembly of the write instruction.
847 * @param pRegFrame The trap register frame.
848 * @param GCPhysFault The fault address as guest physical address.
849 * @param pvFault The fault address.
850 * @todo VBOXSTRICTRC
851 */
852static int pgmRZPoolAccessPfHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
853 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
854{
855 NOREF(pVM); NOREF(GCPhysFault);
856
857 /*
858 * First, do the flushing.
859 */
860 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
861
862 /*
863 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
864 * Must do this in raw mode (!); XP boot will fail otherwise.
865 */
866 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
867 if (rc2 == VINF_SUCCESS)
868 { /* do nothing */ }
869 else if (rc2 == VINF_EM_RESCHEDULE)
870 {
871 if (rc == VINF_SUCCESS)
872 rc = VBOXSTRICTRC_VAL(rc2);
873# ifndef IN_RING3
874 VMCPU_FF_SET(pVCpu, VMCPU_FF_TO_R3);
875# endif
876 }
877 else if (rc2 == VERR_EM_INTERPRETER)
878 {
879# ifdef IN_RC
880 if (PATMIsPatchGCAddr(pVM, pRegFrame->eip))
881 {
882 LogFlow(("pgmRZPoolAccessPfHandlerFlush: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
883 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->eip));
884 rc = VINF_SUCCESS;
885 STAM_COUNTER_INC(&pPool->StatMonitorPfRZIntrFailPatch2);
886 }
887 else
888# endif
889 {
890 rc = VINF_EM_RAW_EMULATE_INSTR;
891 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
892 }
893 }
894 else if (RT_FAILURE_NP(rc2))
895 rc = VBOXSTRICTRC_VAL(rc2);
896 else
897 AssertMsgFailed(("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
898
899 LogFlow(("pgmRZPoolAccessPfHandlerFlush: returns %Rrc (flushed)\n", rc));
900 return rc;
901}
902
903
904/**
905 * Handles the STOSD write accesses.
906 *
907 * @returns VBox status code suitable for scheduling.
908 * @param pVM The cross context VM structure.
909 * @param pPool The pool.
910 * @param pPage The pool page (head).
911 * @param pDis The disassembly of the write instruction.
912 * @param pRegFrame The trap register frame.
913 * @param GCPhysFault The fault address as guest physical address.
914 * @param pvFault The fault address.
915 */
916DECLINLINE(int) pgmRZPoolAccessPfHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
917 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
918{
919 unsigned uIncrement = pDis->Param1.cb;
920 NOREF(pVM);
921
922 Assert(pDis->uCpuMode == DISCPUMODE_32BIT || pDis->uCpuMode == DISCPUMODE_64BIT);
923 Assert(pRegFrame->rcx <= 0x20);
924
925# ifdef VBOX_STRICT
926 if (pDis->uOpMode == DISCPUMODE_32BIT)
927 Assert(uIncrement == 4);
928 else
929 Assert(uIncrement == 8);
930# endif
931
932 Log3(("pgmRZPoolAccessPfHandlerSTOSD\n"));
933
934 /*
935 * Increment the modification counter and insert it into the list
936 * of modified pages the first time.
937 */
938 if (!pPage->cModifications++)
939 pgmPoolMonitorModifiedInsert(pPool, pPage);
940
941 /*
942 * Execute REP STOSD.
943 *
944 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
945 * write situation, meaning that it's safe to write here.
946 */
947 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
948 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
949 while (pRegFrame->rcx)
950 {
951# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
952 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
953 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
954 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
955# else
956 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
957# endif
958# ifdef IN_RC
959 *(uint32_t *)(uintptr_t)pu32 = pRegFrame->eax;
960# else
961 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
962# endif
963 pu32 += uIncrement;
964 GCPhysFault += uIncrement;
965 pRegFrame->rdi += uIncrement;
966 pRegFrame->rcx--;
967 }
968 pRegFrame->rip += pDis->cbInstr;
969
970 LogFlow(("pgmRZPoolAccessPfHandlerSTOSD: returns\n"));
971 return VINF_SUCCESS;
972}
973
974
975/**
976 * Handles the simple write accesses.
977 *
978 * @returns VBox status code suitable for scheduling.
979 * @param pVM The cross context VM structure.
980 * @param pVCpu The cross context virtual CPU structure.
981 * @param pPool The pool.
982 * @param pPage The pool page (head).
983 * @param pDis The disassembly of the write instruction.
984 * @param pRegFrame The trap register frame.
985 * @param GCPhysFault The fault address as guest physical address.
986 * @param pvFault The fault address.
987 * @param pfReused Reused state (in/out)
988 */
989DECLINLINE(int) pgmRZPoolAccessPfHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
990 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
991{
992 Log3(("pgmRZPoolAccessPfHandlerSimple\n"));
993 NOREF(pVM);
994 NOREF(pfReused); /* initialized by caller */
995
996 /*
997 * Increment the modification counter and insert it into the list
998 * of modified pages the first time.
999 */
1000 if (!pPage->cModifications++)
1001 pgmPoolMonitorModifiedInsert(pPool, pPage);
1002
1003 /*
1004 * Clear all the pages. ASSUMES that pvFault is readable.
1005 */
1006# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1007 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
1008# endif
1009
1010 uint32_t cbWrite = DISGetParamSize(pDis, &pDis->Param1);
1011 if (cbWrite <= 8)
1012 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, cbWrite);
1013 else if (cbWrite <= 16)
1014 {
1015 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, 8);
1016 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + 8, NULL, cbWrite - 8);
1017 }
1018 else
1019 {
1020 Assert(cbWrite <= 32);
1021 for (uint32_t off = 0; off < cbWrite; off += 8)
1022 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + off, NULL, RT_MIN(8, cbWrite - off));
1023 }
1024
1025# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1026 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
1027# endif
1028
1029 /*
1030 * Interpret the instruction.
1031 */
1032 VBOXSTRICTRC rc = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
1033 if (RT_SUCCESS(rc))
1034 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
1035 else if (rc == VERR_EM_INTERPRETER)
1036 {
1037 LogFlow(("pgmRZPoolAccessPfHandlerSimple: Interpretation failed for %04x:%RGv - opcode=%d\n",
1038 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode));
1039 rc = VINF_EM_RAW_EMULATE_INSTR;
1040 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
1041 }
1042
1043# if 0 /* experimental code */
1044 if (rc == VINF_SUCCESS)
1045 {
1046 switch (pPage->enmKind)
1047 {
1048 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1049 {
1050 X86PTEPAE GstPte;
1051 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1052 AssertRC(rc);
1053
1054 /* Check the new value written by the guest. If present and with a bogus physical address, then
1055 * it's fairly safe to assume the guest is reusing the PT.
1056 */
1057 if (GstPte.n.u1Present)
1058 {
1059 RTHCPHYS HCPhys = -1;
1060 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1061 if (rc != VINF_SUCCESS)
1062 {
1063 *pfReused = true;
1064 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1065 }
1066 }
1067 break;
1068 }
1069 }
1070 }
1071# endif
1072
1073 LogFlow(("pgmRZPoolAccessPfHandlerSimple: returns %Rrc\n", VBOXSTRICTRC_VAL(rc)));
1074 return VBOXSTRICTRC_VAL(rc);
1075}
1076
1077
1078/**
1079 * @callback_method_impl{FNPGMRZPHYSPFHANDLER,
1080 * \#PF access handler callback for page table pages.}
1081 *
1082 * @remarks The @a pvUser argument points to the PGMPOOLPAGE.
1083 */
1084DECLEXPORT(VBOXSTRICTRC) pgmRZPoolAccessPfHandler(PVM pVM, PVMCPU pVCpu, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame,
1085 RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1086{
1087 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorRZ, a);
1088 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1089 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1090 unsigned cMaxModifications;
1091 bool fForcedFlush = false;
1092 NOREF(uErrorCode);
1093
1094 LogFlow(("pgmRZPoolAccessPfHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1095
1096 pgmLock(pVM);
1097 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1098 {
1099 /* Pool page changed while we were waiting for the lock; ignore. */
1100 Log(("CPU%d: pgmRZPoolAccessPfHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1101 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
1102 pgmUnlock(pVM);
1103 return VINF_SUCCESS;
1104 }
1105# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1106 if (pPage->fDirty)
1107 {
1108 Assert(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH));
1109 pgmUnlock(pVM);
1110 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1111 }
1112# endif
1113
1114# if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1115 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1116 {
1117 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1118 void *pvGst;
1119 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1120 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1121 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1122 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1123 }
1124# endif
1125
1126 /*
1127 * Disassemble the faulting instruction.
1128 */
1129 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1130 int rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
1131 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1132 {
1133 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1134 pgmUnlock(pVM);
1135 return rc;
1136 }
1137
1138 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1139
1140 /*
1141 * We should ALWAYS have the list head as user parameter. This
1142 * is because we use that page to record the changes.
1143 */
1144 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1145
1146# ifdef IN_RING0
1147 /* Maximum nr of modifications depends on the page type. */
1148 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1149 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1150 cMaxModifications = 4;
1151 else
1152 cMaxModifications = 24;
1153# else
1154 cMaxModifications = 48;
1155# endif
1156
1157 /*
1158 * Incremental page table updates should weigh more than random ones.
1159 * (Only applies when started from offset 0)
1160 */
1161 pVCpu->pgm.s.cPoolAccessHandler++;
1162 if ( pPage->GCPtrLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1163 && pPage->GCPtrLastAccessHandlerRip < pRegFrame->rip + 0x40
1164 && pvFault == (pPage->GCPtrLastAccessHandlerFault + pDis->Param1.cb)
1165 && pVCpu->pgm.s.cPoolAccessHandler == pPage->cLastAccessHandler + 1)
1166 {
1167 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1168 Assert(pPage->cModifications < 32000);
1169 pPage->cModifications = pPage->cModifications * 2;
1170 pPage->GCPtrLastAccessHandlerFault = pvFault;
1171 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1172 if (pPage->cModifications >= cMaxModifications)
1173 {
1174 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushReinit);
1175 fForcedFlush = true;
1176 }
1177 }
1178
1179 if (pPage->cModifications >= cMaxModifications)
1180 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1181
1182 /*
1183 * Check if it's worth dealing with.
1184 */
1185 bool fReused = false;
1186 bool fNotReusedNotForking = false;
1187 if ( ( pPage->cModifications < cMaxModifications /** @todo \#define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1188 || pgmPoolIsPageLocked(pPage)
1189 )
1190 && !(fReused = pgmRZPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1191 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1192 {
1193 /*
1194 * Simple instructions, no REP prefix.
1195 */
1196 if (!(pDis->fPrefix & (DISPREFIX_REP | DISPREFIX_REPNE)))
1197 {
1198 rc = pgmRZPoolAccessPfHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1199 if (fReused)
1200 goto flushPage;
1201
1202 /* A mov instruction to change the first page table entry will be remembered so we can detect
1203 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1204 */
1205 if ( rc == VINF_SUCCESS
1206 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1207 && pDis->pCurInstr->uOpcode == OP_MOV
1208 && (pvFault & PAGE_OFFSET_MASK) == 0)
1209 {
1210 pPage->GCPtrLastAccessHandlerFault = pvFault;
1211 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1212 pPage->GCPtrLastAccessHandlerRip = pRegFrame->rip;
1213 /* Make sure we don't kick out a page too quickly. */
1214 if (pPage->cModifications > 8)
1215 pPage->cModifications = 2;
1216 }
1217 else if (pPage->GCPtrLastAccessHandlerFault == pvFault)
1218 {
1219 /* ignore the 2nd write to this page table entry. */
1220 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1221 }
1222 else
1223 {
1224 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
1225 pPage->GCPtrLastAccessHandlerRip = 0;
1226 }
1227
1228 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
1229 pgmUnlock(pVM);
1230 return rc;
1231 }
1232
1233 /*
1234 * Windows is frequently doing small memset() operations (netio test 4k+).
1235 * We have to deal with these or we'll kill the cache and performance.
1236 */
1237 if ( pDis->pCurInstr->uOpcode == OP_STOSWD
1238 && !pRegFrame->eflags.Bits.u1DF
1239 && pDis->uOpMode == pDis->uCpuMode
1240 && pDis->uAddrMode == pDis->uCpuMode)
1241 {
1242 bool fValidStosd = false;
1243
1244 if ( pDis->uCpuMode == DISCPUMODE_32BIT
1245 && pDis->fPrefix == DISPREFIX_REP
1246 && pRegFrame->ecx <= 0x20
1247 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1248 && !((uintptr_t)pvFault & 3)
1249 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1250 )
1251 {
1252 fValidStosd = true;
1253 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1254 }
1255 else
1256 if ( pDis->uCpuMode == DISCPUMODE_64BIT
1257 && pDis->fPrefix == (DISPREFIX_REP | DISPREFIX_REX)
1258 && pRegFrame->rcx <= 0x20
1259 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1260 && !((uintptr_t)pvFault & 7)
1261 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1262 )
1263 {
1264 fValidStosd = true;
1265 }
1266
1267 if (fValidStosd)
1268 {
1269 rc = pgmRZPoolAccessPfHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1270 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZRepStosd, a);
1271 pgmUnlock(pVM);
1272 return rc;
1273 }
1274 }
1275
1276 /* REP prefix, don't bother. */
1277 STAM_COUNTER_INC(&pPool->StatMonitorPfRZRepPrefix);
1278 Log4(("pgmRZPoolAccessPfHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1279 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode, pDis->fPrefix));
1280 fNotReusedNotForking = true;
1281 }
1282
1283# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1284 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1285 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1286 */
1287 if ( pPage->cModifications >= cMaxModifications
1288 && !fForcedFlush
1289 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1290 && ( fNotReusedNotForking
1291 || ( !pgmRZPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
1292 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1293 )
1294 )
1295 {
1296 Assert(!pgmPoolIsPageLocked(pPage));
1297 Assert(pPage->fDirty == false);
1298
1299 /* Flush any monitored duplicates as we will disable write protection. */
1300 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1301 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1302 {
1303 PPGMPOOLPAGE pPageHead = pPage;
1304
1305 /* Find the monitor head. */
1306 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1307 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1308
1309 while (pPageHead)
1310 {
1311 unsigned idxNext = pPageHead->iMonitoredNext;
1312
1313 if (pPageHead != pPage)
1314 {
1315 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1316 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1317 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1318 AssertRC(rc2);
1319 }
1320
1321 if (idxNext == NIL_PGMPOOL_IDX)
1322 break;
1323
1324 pPageHead = &pPool->aPages[idxNext];
1325 }
1326 }
1327
1328 /* The flushing above might fail for locked pages, so double check. */
1329 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1330 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1331 {
1332 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1333
1334 /* Temporarily allow write access to the page table again. */
1335 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK, pPage->GCPhys & PAGE_BASE_GC_MASK);
1336 if (rc == VINF_SUCCESS)
1337 {
1338 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1339 AssertMsg(rc == VINF_SUCCESS
1340 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1341 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1342 || rc == VERR_PAGE_NOT_PRESENT,
1343 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1344# ifdef VBOX_STRICT
1345 pPage->GCPtrDirtyFault = pvFault;
1346# endif
1347
1348 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, a);
1349 pgmUnlock(pVM);
1350 return rc;
1351 }
1352 }
1353 }
1354# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1355
1356 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushModOverflow);
1357flushPage:
1358 /*
1359 * Not worth it, so flush it.
1360 *
1361 * If we considered it to be reused, don't go back to ring-3
1362 * to emulate failed instructions since we usually cannot
1363 * interpret then. This may be a bit risky, in which case
1364 * the reuse detection must be fixed.
1365 */
1366 rc = pgmRZPoolAccessPfHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1367 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1368 && fReused)
1369 {
1370 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1371 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1372 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1373 }
1374 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZFlushPage, a);
1375 pgmUnlock(pVM);
1376 return rc;
1377}
1378
1379#endif /* !IN_RING3 */
1380
1381/**
1382 * @callback_method_impl{FNPGMPHYSHANDLER,
1383 * Access handler for shadowed page table pages.}
1384 *
1385 * @remarks Only uses the VINF_PGM_HANDLER_DO_DEFAULT status.
1386 */
1387PGM_ALL_CB2_DECL(VBOXSTRICTRC)
1388pgmPoolAccessHandler(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf,
1389 PGMACCESSTYPE enmAccessType, PGMACCESSORIGIN enmOrigin, void *pvUser)
1390{
1391 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1392 STAM_PROFILE_START(&pPool->CTX_SUFF_Z(StatMonitor), a);
1393 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1394 LogFlow(("PGM_ALL_CB_DECL: GCPhys=%RGp %p:{.Core=%RHp, .idx=%d, .GCPhys=%RGp, .enmType=%d}\n",
1395 GCPhys, pPage, pPage->Core.Key, pPage->idx, pPage->GCPhys, pPage->enmKind));
1396
1397 NOREF(pvPhys); NOREF(pvBuf); NOREF(enmAccessType);
1398
1399 pgmLock(pVM);
1400
1401#ifdef VBOX_WITH_STATISTICS
1402 /*
1403 * Collect stats on the access.
1404 */
1405 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Sizes)) == 19);
1406 if (cbBuf <= 16 && cbBuf > 0)
1407 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[cbBuf - 1]);
1408 else if (cbBuf >= 17 && cbBuf < 32)
1409 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[16]);
1410 else if (cbBuf >= 32 && cbBuf < 64)
1411 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[17]);
1412 else if (cbBuf >= 64)
1413 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[18]);
1414
1415 uint8_t cbAlign;
1416 switch (pPage->enmKind)
1417 {
1418 default:
1419 cbAlign = 7;
1420 break;
1421 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1422 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1423 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1424 case PGMPOOLKIND_32BIT_PD:
1425 case PGMPOOLKIND_32BIT_PD_PHYS:
1426 cbAlign = 3;
1427 break;
1428 }
1429 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Misaligned)) == 7);
1430 if ((uint8_t)GCPhys & cbAlign)
1431 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Misaligned)[((uint8_t)GCPhys & cbAlign) - 1]);
1432#endif
1433
1434 /*
1435 * Make sure the pool page wasn't modified by a different CPU.
1436 */
1437 if (PHYS_PAGE_ADDRESS(GCPhys) == PHYS_PAGE_ADDRESS(pPage->GCPhys))
1438 {
1439 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1440
1441 /* The max modification count before flushing depends on the context and page type. */
1442#ifdef IN_RING3
1443 uint16_t const cMaxModifications = 96; /* it's cheaper here, right? */
1444#else
1445 uint16_t cMaxModifications;
1446 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1447 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1448 cMaxModifications = 4;
1449 else
1450 cMaxModifications = 24;
1451# ifdef IN_RC
1452 cMaxModifications *= 2; /* traps are cheaper than exists. */
1453# endif
1454#endif
1455
1456 /*
1457 * We don't have to be very sophisticated about this since there are relativly few calls here.
1458 * However, we must try our best to detect any non-cpu accesses (disk / networking).
1459 */
1460 if ( ( pPage->cModifications < cMaxModifications
1461 || pgmPoolIsPageLocked(pPage) )
1462 && enmOrigin != PGMACCESSORIGIN_DEVICE
1463 && cbBuf <= 16)
1464 {
1465 /* Clear the shadow entry. */
1466 if (!pPage->cModifications++)
1467 pgmPoolMonitorModifiedInsert(pPool, pPage);
1468
1469 if (cbBuf <= 8)
1470 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, (uint32_t)cbBuf);
1471 else
1472 {
1473 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, 8);
1474 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys + 8, (uint8_t *)pvBuf + 8, (uint32_t)cbBuf - 8);
1475 }
1476 }
1477 else
1478 {
1479 /* ASSUME that VERR_PGM_POOL_CLEARED can be ignored here and that FFs will deal with it in due time. */
1480 pgmPoolMonitorChainFlush(pPool, pPage);
1481 }
1482
1483 STAM_PROFILE_STOP_EX(&pPool->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1484 }
1485 else
1486 Log(("CPU%d: PGM_ALL_CB_DECL pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhys), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1487 pgmUnlock(pVM);
1488 return VINF_PGM_HANDLER_DO_DEFAULT;
1489}
1490
1491
1492# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1493
1494# if defined(VBOX_STRICT) && !defined(IN_RING3)
1495
1496/**
1497 * Check references to guest physical memory in a PAE / PAE page table.
1498 *
1499 * @param pPool The pool.
1500 * @param pPage The page.
1501 * @param pShwPT The shadow page table (mapping of the page).
1502 * @param pGstPT The guest page table.
1503 */
1504static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1505{
1506 unsigned cErrors = 0;
1507 int LastRc = -1; /* initialized to shut up gcc */
1508 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1509 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1510 PVM pVM = pPool->CTX_SUFF(pVM);
1511
1512#ifdef VBOX_STRICT
1513 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1514 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1515#endif
1516 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1517 {
1518 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1519 {
1520 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1521 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1522 if ( rc != VINF_SUCCESS
1523 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1524 {
1525 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1526 LastPTE = i;
1527 LastRc = rc;
1528 LastHCPhys = HCPhys;
1529 cErrors++;
1530
1531 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1532 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1533 AssertRC(rc);
1534
1535 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1536 {
1537 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1538
1539 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1540 {
1541 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1542
1543 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1544 {
1545 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1546 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1547 {
1548 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1549 }
1550 }
1551
1552 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1553 }
1554 }
1555 }
1556 }
1557 }
1558 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1559}
1560
1561
1562/**
1563 * Check references to guest physical memory in a PAE / 32-bit page table.
1564 *
1565 * @param pPool The pool.
1566 * @param pPage The page.
1567 * @param pShwPT The shadow page table (mapping of the page).
1568 * @param pGstPT The guest page table.
1569 */
1570static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1571{
1572 unsigned cErrors = 0;
1573 int LastRc = -1; /* initialized to shut up gcc */
1574 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1575 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1576 PVM pVM = pPool->CTX_SUFF(pVM);
1577
1578#ifdef VBOX_STRICT
1579 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1580 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1581#endif
1582 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1583 {
1584 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1585 {
1586 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1587 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1588 if ( rc != VINF_SUCCESS
1589 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1590 {
1591 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1592 LastPTE = i;
1593 LastRc = rc;
1594 LastHCPhys = HCPhys;
1595 cErrors++;
1596
1597 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1598 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1599 AssertRC(rc);
1600
1601 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1602 {
1603 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1604
1605 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1606 {
1607 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1608
1609 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1610 {
1611 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1612 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1613 {
1614 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1615 }
1616 }
1617
1618 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1619 }
1620 }
1621 }
1622 }
1623 }
1624 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1625}
1626
1627# endif /* VBOX_STRICT && !IN_RING3 */
1628
1629/**
1630 * Clear references to guest physical memory in a PAE / PAE page table.
1631 *
1632 * @returns nr of changed PTEs
1633 * @param pPool The pool.
1634 * @param pPage The page.
1635 * @param pShwPT The shadow page table (mapping of the page).
1636 * @param pGstPT The guest page table.
1637 * @param pOldGstPT The old cached guest page table.
1638 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1639 * @param pfFlush Flush reused page table (out)
1640 */
1641DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1642 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1643{
1644 unsigned cChanged = 0;
1645
1646#ifdef VBOX_STRICT
1647 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1648 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1649#endif
1650 *pfFlush = false;
1651
1652 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1653 {
1654 /* Check the new value written by the guest. If present and with a bogus physical address, then
1655 * it's fairly safe to assume the guest is reusing the PT.
1656 */
1657 if ( fAllowRemoval
1658 && pGstPT->a[i].n.u1Present)
1659 {
1660 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1661 {
1662 *pfFlush = true;
1663 return ++cChanged;
1664 }
1665 }
1666 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1667 {
1668 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1669 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1670 {
1671#ifdef VBOX_STRICT
1672 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1673 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1674 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1675#endif
1676 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1677 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1678 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1679 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1680
1681 if ( uHostAttr == uGuestAttr
1682 && fHostRW <= fGuestRW)
1683 continue;
1684 }
1685 cChanged++;
1686 /* Something was changed, so flush it. */
1687 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1688 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1689 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1690 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1691 }
1692 }
1693 return cChanged;
1694}
1695
1696
1697/**
1698 * Clear references to guest physical memory in a PAE / PAE page table.
1699 *
1700 * @returns nr of changed PTEs
1701 * @param pPool The pool.
1702 * @param pPage The page.
1703 * @param pShwPT The shadow page table (mapping of the page).
1704 * @param pGstPT The guest page table.
1705 * @param pOldGstPT The old cached guest page table.
1706 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1707 * @param pfFlush Flush reused page table (out)
1708 */
1709DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1710 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1711{
1712 unsigned cChanged = 0;
1713
1714#ifdef VBOX_STRICT
1715 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1716 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1717#endif
1718 *pfFlush = false;
1719
1720 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1721 {
1722 /* Check the new value written by the guest. If present and with a bogus physical address, then
1723 * it's fairly safe to assume the guest is reusing the PT.
1724 */
1725 if ( fAllowRemoval
1726 && pGstPT->a[i].n.u1Present)
1727 {
1728 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK))
1729 {
1730 *pfFlush = true;
1731 return ++cChanged;
1732 }
1733 }
1734 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1735 {
1736 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1737 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1738 {
1739#ifdef VBOX_STRICT
1740 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1741 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1742 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1743#endif
1744 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1745 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1746 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1747 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1748
1749 if ( uHostAttr == uGuestAttr
1750 && fHostRW <= fGuestRW)
1751 continue;
1752 }
1753 cChanged++;
1754 /* Something was changed, so flush it. */
1755 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1756 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1757 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1758 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1759 }
1760 }
1761 return cChanged;
1762}
1763
1764
1765/**
1766 * Flush a dirty page
1767 *
1768 * @param pVM The cross context VM structure.
1769 * @param pPool The pool.
1770 * @param idxSlot Dirty array slot index
1771 * @param fAllowRemoval Allow a reused page table to be removed
1772 */
1773static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1774{
1775 PPGMPOOLPAGE pPage;
1776 unsigned idxPage;
1777
1778 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1779 if (pPool->aDirtyPages[idxSlot].uIdx == NIL_PGMPOOL_IDX)
1780 return;
1781
1782 idxPage = pPool->aDirtyPages[idxSlot].uIdx;
1783 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1784 pPage = &pPool->aPages[idxPage];
1785 Assert(pPage->idx == idxPage);
1786 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1787
1788 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1789 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1790
1791#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1792 PVMCPU pVCpu = VMMGetCpu(pVM);
1793 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
1794#endif
1795
1796 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1797 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK);
1798 Assert(rc == VINF_SUCCESS);
1799 pPage->fDirty = false;
1800
1801#ifdef VBOX_STRICT
1802 uint64_t fFlags = 0;
1803 RTHCPHYS HCPhys;
1804 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->GCPtrDirtyFault, &fFlags, &HCPhys);
1805 AssertMsg( ( rc == VINF_SUCCESS
1806 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1807 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1808 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1809 || rc == VERR_PAGE_NOT_PRESENT,
1810 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->GCPtrDirtyFault, rc, fFlags));
1811#endif
1812
1813 /* Flush those PTEs that have changed. */
1814 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1815 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1816 void *pvGst;
1817 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1818 bool fFlush;
1819 unsigned cChanges;
1820
1821 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1822 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1823 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1824 else
1825 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1826 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1827
1828 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1829 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1830 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1831 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1832
1833 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1834 Assert(pPage->cModifications);
1835 if (cChanges < 4)
1836 pPage->cModifications = 1; /* must use > 0 here */
1837 else
1838 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1839
1840 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1841 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1842 pPool->idxFreeDirtyPage = idxSlot;
1843
1844 pPool->cDirtyPages--;
1845 pPool->aDirtyPages[idxSlot].uIdx = NIL_PGMPOOL_IDX;
1846 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1847 if (fFlush)
1848 {
1849 Assert(fAllowRemoval);
1850 Log(("Flush reused page table!\n"));
1851 pgmPoolFlushPage(pPool, pPage);
1852 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1853 }
1854 else
1855 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1856
1857#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1858 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
1859#endif
1860}
1861
1862
1863# ifndef IN_RING3
1864/**
1865 * Add a new dirty page
1866 *
1867 * @param pVM The cross context VM structure.
1868 * @param pPool The pool.
1869 * @param pPage The page.
1870 */
1871void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1872{
1873 unsigned idxFree;
1874
1875 PGM_LOCK_ASSERT_OWNER(pVM);
1876 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1877 Assert(!pPage->fDirty);
1878
1879 idxFree = pPool->idxFreeDirtyPage;
1880 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1881 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1882
1883 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1884 {
1885 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1886 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1887 }
1888 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1889 AssertMsg(pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1890
1891 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1892
1893 /*
1894 * Make a copy of the guest page table as we require valid GCPhys addresses
1895 * when removing references to physical pages.
1896 * (The HCPhys linear lookup is *extremely* expensive!)
1897 */
1898 void *pvGst;
1899 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1900 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst, (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT) ? PAGE_SIZE : PAGE_SIZE/2);
1901# ifdef VBOX_STRICT
1902 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1903 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1904 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1905 else
1906 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1907 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1908# endif
1909 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1910
1911 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1912 pPage->fDirty = true;
1913 pPage->idxDirtyEntry = (uint8_t)idxFree; Assert(pPage->idxDirtyEntry == idxFree);
1914 pPool->aDirtyPages[idxFree].uIdx = pPage->idx;
1915 pPool->cDirtyPages++;
1916
1917 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1918 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1919 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1920 {
1921 unsigned i;
1922 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1923 {
1924 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1925 if (pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX)
1926 {
1927 pPool->idxFreeDirtyPage = idxFree;
1928 break;
1929 }
1930 }
1931 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1932 }
1933
1934 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX);
1935
1936 /*
1937 * Clear all references to this shadow table. See @bugref{7298}.
1938 */
1939 pgmPoolTrackClearPageUsers(pPool, pPage);
1940}
1941# endif /* !IN_RING3 */
1942
1943
1944/**
1945 * Check if the specified page is dirty (not write monitored)
1946 *
1947 * @return dirty or not
1948 * @param pVM The cross context VM structure.
1949 * @param GCPhys Guest physical address
1950 */
1951bool pgmPoolIsDirtyPage(PVM pVM, RTGCPHYS GCPhys)
1952{
1953 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1954 PGM_LOCK_ASSERT_OWNER(pVM);
1955 if (!pPool->cDirtyPages)
1956 return false;
1957
1958 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1959
1960 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1961 {
1962 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
1963 {
1964 PPGMPOOLPAGE pPage;
1965 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
1966
1967 pPage = &pPool->aPages[idxPage];
1968 if (pPage->GCPhys == GCPhys)
1969 return true;
1970 }
1971 }
1972 return false;
1973}
1974
1975
1976/**
1977 * Reset all dirty pages by reinstating page monitoring.
1978 *
1979 * @param pVM The cross context VM structure.
1980 */
1981void pgmPoolResetDirtyPages(PVM pVM)
1982{
1983 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1984 PGM_LOCK_ASSERT_OWNER(pVM);
1985 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1986
1987 if (!pPool->cDirtyPages)
1988 return;
1989
1990 Log(("pgmPoolResetDirtyPages\n"));
1991 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1992 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1993
1994 pPool->idxFreeDirtyPage = 0;
1995 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1996 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1997 {
1998 unsigned i;
1999 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2000 {
2001 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
2002 {
2003 pPool->idxFreeDirtyPage = i;
2004 break;
2005 }
2006 }
2007 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
2008 }
2009
2010 Assert(pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
2011 return;
2012}
2013
2014
2015/**
2016 * Invalidate the PT entry for the specified page
2017 *
2018 * @param pVM The cross context VM structure.
2019 * @param GCPtrPage Guest page to invalidate
2020 */
2021void pgmPoolResetDirtyPage(PVM pVM, RTGCPTR GCPtrPage)
2022{
2023 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2024 PGM_LOCK_ASSERT_OWNER(pVM);
2025 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
2026
2027 if (!pPool->cDirtyPages)
2028 return;
2029
2030 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage)); RT_NOREF_PV(GCPtrPage);
2031 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2032 {
2033 }
2034}
2035
2036
2037/**
2038 * Reset all dirty pages by reinstating page monitoring.
2039 *
2040 * @param pVM The cross context VM structure.
2041 * @param GCPhysPT Physical address of the page table
2042 */
2043void pgmPoolInvalidateDirtyPage(PVM pVM, RTGCPHYS GCPhysPT)
2044{
2045 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2046 PGM_LOCK_ASSERT_OWNER(pVM);
2047 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
2048 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
2049
2050 if (!pPool->cDirtyPages)
2051 return;
2052
2053 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2054
2055 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2056 {
2057 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
2058 {
2059 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
2060
2061 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
2062 if (pPage->GCPhys == GCPhysPT)
2063 {
2064 idxDirtyPage = i;
2065 break;
2066 }
2067 }
2068 }
2069
2070 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
2071 {
2072 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
2073 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
2074 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
2075 {
2076 unsigned i;
2077 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2078 {
2079 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
2080 {
2081 pPool->idxFreeDirtyPage = i;
2082 break;
2083 }
2084 }
2085 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
2086 }
2087 }
2088}
2089
2090# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
2091
2092/**
2093 * Inserts a page into the GCPhys hash table.
2094 *
2095 * @param pPool The pool.
2096 * @param pPage The page.
2097 */
2098DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2099{
2100 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
2101 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2102 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2103 pPage->iNext = pPool->aiHash[iHash];
2104 pPool->aiHash[iHash] = pPage->idx;
2105}
2106
2107
2108/**
2109 * Removes a page from the GCPhys hash table.
2110 *
2111 * @param pPool The pool.
2112 * @param pPage The page.
2113 */
2114DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2115{
2116 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
2117 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2118 if (pPool->aiHash[iHash] == pPage->idx)
2119 pPool->aiHash[iHash] = pPage->iNext;
2120 else
2121 {
2122 uint16_t iPrev = pPool->aiHash[iHash];
2123 for (;;)
2124 {
2125 const int16_t i = pPool->aPages[iPrev].iNext;
2126 if (i == pPage->idx)
2127 {
2128 pPool->aPages[iPrev].iNext = pPage->iNext;
2129 break;
2130 }
2131 if (i == NIL_PGMPOOL_IDX)
2132 {
2133 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
2134 break;
2135 }
2136 iPrev = i;
2137 }
2138 }
2139 pPage->iNext = NIL_PGMPOOL_IDX;
2140}
2141
2142
2143/**
2144 * Frees up one cache page.
2145 *
2146 * @returns VBox status code.
2147 * @retval VINF_SUCCESS on success.
2148 * @param pPool The pool.
2149 * @param iUser The user index.
2150 */
2151static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
2152{
2153#ifndef IN_RC
2154 const PVM pVM = pPool->CTX_SUFF(pVM);
2155#endif
2156 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
2157 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
2158
2159 /*
2160 * Select one page from the tail of the age list.
2161 */
2162 PPGMPOOLPAGE pPage;
2163 for (unsigned iLoop = 0; ; iLoop++)
2164 {
2165 uint16_t iToFree = pPool->iAgeTail;
2166 if (iToFree == iUser && iUser != NIL_PGMPOOL_IDX)
2167 iToFree = pPool->aPages[iToFree].iAgePrev;
2168/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2169 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2170 {
2171 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2172 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2173 {
2174 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2175 continue;
2176 iToFree = i;
2177 break;
2178 }
2179 }
2180*/
2181 Assert(iToFree != iUser);
2182 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
2183 pPage = &pPool->aPages[iToFree];
2184
2185 /*
2186 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2187 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2188 */
2189 if ( !pgmPoolIsPageLocked(pPage)
2190 && pPage->idx >= PGMPOOL_IDX_FIRST /* paranoia (#6349) */)
2191 break;
2192 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2193 pgmPoolCacheUsed(pPool, pPage);
2194 AssertLogRelReturn(iLoop < 8192, VERR_PGM_POOL_TOO_MANY_LOOPS);
2195 }
2196
2197 /*
2198 * Found a usable page, flush it and return.
2199 */
2200 int rc = pgmPoolFlushPage(pPool, pPage);
2201 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2202 /** @todo find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2203 if (rc == VINF_SUCCESS)
2204 PGM_INVL_ALL_VCPU_TLBS(pVM);
2205 return rc;
2206}
2207
2208
2209/**
2210 * Checks if a kind mismatch is really a page being reused
2211 * or if it's just normal remappings.
2212 *
2213 * @returns true if reused and the cached page (enmKind1) should be flushed
2214 * @returns false if not reused.
2215 * @param enmKind1 The kind of the cached page.
2216 * @param enmKind2 The kind of the requested page.
2217 */
2218static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2219{
2220 switch (enmKind1)
2221 {
2222 /*
2223 * Never reuse them. There is no remapping in non-paging mode.
2224 */
2225 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2226 case PGMPOOLKIND_32BIT_PD_PHYS:
2227 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2228 case PGMPOOLKIND_PAE_PD_PHYS:
2229 case PGMPOOLKIND_PAE_PDPT_PHYS:
2230 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2231 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2232 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2233 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2234 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2235 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2236 return false;
2237
2238 /*
2239 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2240 */
2241 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2242 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2243 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2244 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2245 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2246 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2247 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2248 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2249 case PGMPOOLKIND_32BIT_PD:
2250 case PGMPOOLKIND_PAE_PDPT:
2251 switch (enmKind2)
2252 {
2253 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2254 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2255 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2256 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2257 case PGMPOOLKIND_64BIT_PML4:
2258 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2259 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2260 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2261 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2262 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2263 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2264 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2265 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2266 return true;
2267 default:
2268 return false;
2269 }
2270
2271 /*
2272 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2273 */
2274 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2275 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2276 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2277 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2278 case PGMPOOLKIND_64BIT_PML4:
2279 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2280 switch (enmKind2)
2281 {
2282 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2283 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2284 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2285 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2286 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2287 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2288 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2289 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2290 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2291 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2292 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2293 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2294 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2295 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2296 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2297 return true;
2298 default:
2299 return false;
2300 }
2301
2302 /*
2303 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2304 */
2305 case PGMPOOLKIND_ROOT_NESTED:
2306 return false;
2307
2308 default:
2309 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2310 }
2311}
2312
2313
2314/**
2315 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2316 *
2317 * @returns VBox status code.
2318 * @retval VINF_PGM_CACHED_PAGE on success.
2319 * @retval VERR_FILE_NOT_FOUND if not found.
2320 * @param pPool The pool.
2321 * @param GCPhys The GC physical address of the page we're gonna shadow.
2322 * @param enmKind The kind of mapping.
2323 * @param enmAccess Access type for the mapping (only relevant for big pages)
2324 * @param fA20Enabled Whether the CPU has the A20 gate enabled.
2325 * @param iUser The shadow page pool index of the user table. This is
2326 * NIL_PGMPOOL_IDX for root pages.
2327 * @param iUserTable The index into the user table (shadowed). Ignored if
2328 * root page
2329 * @param ppPage Where to store the pointer to the page.
2330 */
2331static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
2332 uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2333{
2334 /*
2335 * Look up the GCPhys in the hash.
2336 */
2337 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2338 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2339 if (i != NIL_PGMPOOL_IDX)
2340 {
2341 do
2342 {
2343 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2344 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2345 if (pPage->GCPhys == GCPhys)
2346 {
2347 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2348 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess
2349 && pPage->fA20Enabled == fA20Enabled)
2350 {
2351 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2352 * doesn't flush it in case there are no more free use records.
2353 */
2354 pgmPoolCacheUsed(pPool, pPage);
2355
2356 int rc = VINF_SUCCESS;
2357 if (iUser != NIL_PGMPOOL_IDX)
2358 rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2359 if (RT_SUCCESS(rc))
2360 {
2361 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2362 *ppPage = pPage;
2363 if (pPage->cModifications)
2364 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2365 STAM_COUNTER_INC(&pPool->StatCacheHits);
2366 return VINF_PGM_CACHED_PAGE;
2367 }
2368 return rc;
2369 }
2370
2371 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2372 {
2373 /*
2374 * The kind is different. In some cases we should now flush the page
2375 * as it has been reused, but in most cases this is normal remapping
2376 * of PDs as PT or big pages using the GCPhys field in a slightly
2377 * different way than the other kinds.
2378 */
2379 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2380 {
2381 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2382 pgmPoolFlushPage(pPool, pPage);
2383 break;
2384 }
2385 }
2386 }
2387
2388 /* next */
2389 i = pPage->iNext;
2390 } while (i != NIL_PGMPOOL_IDX);
2391 }
2392
2393 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2394 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2395 return VERR_FILE_NOT_FOUND;
2396}
2397
2398
2399/**
2400 * Inserts a page into the cache.
2401 *
2402 * @param pPool The pool.
2403 * @param pPage The cached page.
2404 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2405 */
2406static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2407{
2408 /*
2409 * Insert into the GCPhys hash if the page is fit for that.
2410 */
2411 Assert(!pPage->fCached);
2412 if (fCanBeCached)
2413 {
2414 pPage->fCached = true;
2415 pgmPoolHashInsert(pPool, pPage);
2416 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2417 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2418 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2419 }
2420 else
2421 {
2422 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2423 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2424 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2425 }
2426
2427 /*
2428 * Insert at the head of the age list.
2429 */
2430 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2431 pPage->iAgeNext = pPool->iAgeHead;
2432 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2433 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2434 else
2435 pPool->iAgeTail = pPage->idx;
2436 pPool->iAgeHead = pPage->idx;
2437}
2438
2439
2440/**
2441 * Flushes a cached page.
2442 *
2443 * @param pPool The pool.
2444 * @param pPage The cached page.
2445 */
2446static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2447{
2448 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2449
2450 /*
2451 * Remove the page from the hash.
2452 */
2453 if (pPage->fCached)
2454 {
2455 pPage->fCached = false;
2456 pgmPoolHashRemove(pPool, pPage);
2457 }
2458 else
2459 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2460
2461 /*
2462 * Remove it from the age list.
2463 */
2464 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2465 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2466 else
2467 pPool->iAgeTail = pPage->iAgePrev;
2468 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2469 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2470 else
2471 pPool->iAgeHead = pPage->iAgeNext;
2472 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2473 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2474}
2475
2476
2477/**
2478 * Looks for pages sharing the monitor.
2479 *
2480 * @returns Pointer to the head page.
2481 * @returns NULL if not found.
2482 * @param pPool The Pool
2483 * @param pNewPage The page which is going to be monitored.
2484 */
2485static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2486{
2487 /*
2488 * Look up the GCPhys in the hash.
2489 */
2490 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2491 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2492 if (i == NIL_PGMPOOL_IDX)
2493 return NULL;
2494 do
2495 {
2496 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2497 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2498 && pPage != pNewPage)
2499 {
2500 switch (pPage->enmKind)
2501 {
2502 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2503 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2504 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2505 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2506 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2507 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2508 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2509 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2510 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2511 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2512 case PGMPOOLKIND_64BIT_PML4:
2513 case PGMPOOLKIND_32BIT_PD:
2514 case PGMPOOLKIND_PAE_PDPT:
2515 {
2516 /* find the head */
2517 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2518 {
2519 Assert(pPage->iMonitoredPrev != pPage->idx);
2520 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2521 }
2522 return pPage;
2523 }
2524
2525 /* ignore, no monitoring. */
2526 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2527 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2528 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2529 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2530 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2531 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2532 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2533 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2534 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2535 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2536 case PGMPOOLKIND_ROOT_NESTED:
2537 case PGMPOOLKIND_PAE_PD_PHYS:
2538 case PGMPOOLKIND_PAE_PDPT_PHYS:
2539 case PGMPOOLKIND_32BIT_PD_PHYS:
2540 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2541 break;
2542 default:
2543 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2544 }
2545 }
2546
2547 /* next */
2548 i = pPage->iNext;
2549 } while (i != NIL_PGMPOOL_IDX);
2550 return NULL;
2551}
2552
2553
2554/**
2555 * Enabled write monitoring of a guest page.
2556 *
2557 * @returns VBox status code.
2558 * @retval VINF_SUCCESS on success.
2559 * @param pPool The pool.
2560 * @param pPage The cached page.
2561 */
2562static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2563{
2564 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2565
2566 /*
2567 * Filter out the relevant kinds.
2568 */
2569 switch (pPage->enmKind)
2570 {
2571 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2572 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2573 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2574 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2575 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2576 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2577 case PGMPOOLKIND_64BIT_PML4:
2578 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2579 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2580 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2581 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2582 case PGMPOOLKIND_32BIT_PD:
2583 case PGMPOOLKIND_PAE_PDPT:
2584 break;
2585
2586 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2587 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2588 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2589 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2590 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2591 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2592 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2593 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2594 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2595 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2596 case PGMPOOLKIND_ROOT_NESTED:
2597 /* Nothing to monitor here. */
2598 return VINF_SUCCESS;
2599
2600 case PGMPOOLKIND_32BIT_PD_PHYS:
2601 case PGMPOOLKIND_PAE_PDPT_PHYS:
2602 case PGMPOOLKIND_PAE_PD_PHYS:
2603 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2604 /* Nothing to monitor here. */
2605 return VINF_SUCCESS;
2606 default:
2607 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2608 }
2609
2610 /*
2611 * Install handler.
2612 */
2613 int rc;
2614 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2615 if (pPageHead)
2616 {
2617 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2618 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2619
2620#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2621 if (pPageHead->fDirty)
2622 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirtyEntry, false /* do not remove */);
2623#endif
2624
2625 pPage->iMonitoredPrev = pPageHead->idx;
2626 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2627 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2628 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2629 pPageHead->iMonitoredNext = pPage->idx;
2630 rc = VINF_SUCCESS;
2631 }
2632 else
2633 {
2634 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2635 PVM pVM = pPool->CTX_SUFF(pVM);
2636 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2637 rc = PGMHandlerPhysicalRegister(pVM, GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK, pPool->hAccessHandlerType,
2638 MMHyperCCToR3(pVM, pPage), MMHyperCCToR0(pVM, pPage), MMHyperCCToRC(pVM, pPage),
2639 NIL_RTR3PTR /*pszDesc*/);
2640 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2641 * the heap size should suffice. */
2642 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2643 PVMCPU pVCpu = VMMGetCpu(pVM);
2644 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2645 }
2646 pPage->fMonitored = true;
2647 return rc;
2648}
2649
2650
2651/**
2652 * Disables write monitoring of a guest page.
2653 *
2654 * @returns VBox status code.
2655 * @retval VINF_SUCCESS on success.
2656 * @param pPool The pool.
2657 * @param pPage The cached page.
2658 */
2659static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2660{
2661 /*
2662 * Filter out the relevant kinds.
2663 */
2664 switch (pPage->enmKind)
2665 {
2666 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2667 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2668 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2669 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2670 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2671 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2672 case PGMPOOLKIND_64BIT_PML4:
2673 case PGMPOOLKIND_32BIT_PD:
2674 case PGMPOOLKIND_PAE_PDPT:
2675 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2676 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2677 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2678 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2679 break;
2680
2681 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2682 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2683 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2684 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2685 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2686 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2687 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2688 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2689 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2690 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2691 case PGMPOOLKIND_ROOT_NESTED:
2692 case PGMPOOLKIND_PAE_PD_PHYS:
2693 case PGMPOOLKIND_PAE_PDPT_PHYS:
2694 case PGMPOOLKIND_32BIT_PD_PHYS:
2695 /* Nothing to monitor here. */
2696 Assert(!pPage->fMonitored);
2697 return VINF_SUCCESS;
2698
2699 default:
2700 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2701 }
2702 Assert(pPage->fMonitored);
2703
2704 /*
2705 * Remove the page from the monitored list or uninstall it if last.
2706 */
2707 const PVM pVM = pPool->CTX_SUFF(pVM);
2708 int rc;
2709 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2710 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2711 {
2712 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2713 {
2714 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2715 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2716 rc = PGMHandlerPhysicalChangeUserArgs(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK, MMHyperCCToR3(pVM, pNewHead),
2717 MMHyperCCToR0(pVM, pNewHead), MMHyperCCToRC(pVM, pNewHead));
2718
2719 AssertFatalRCSuccess(rc);
2720 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2721 }
2722 else
2723 {
2724 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2725 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2726 {
2727 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2728 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2729 }
2730 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2731 rc = VINF_SUCCESS;
2732 }
2733 }
2734 else
2735 {
2736 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2737 AssertFatalRC(rc);
2738 PVMCPU pVCpu = VMMGetCpu(pVM);
2739 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2740 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2741 }
2742 pPage->fMonitored = false;
2743
2744 /*
2745 * Remove it from the list of modified pages (if in it).
2746 */
2747 pgmPoolMonitorModifiedRemove(pPool, pPage);
2748
2749 return rc;
2750}
2751
2752
2753/**
2754 * Inserts the page into the list of modified pages.
2755 *
2756 * @param pPool The pool.
2757 * @param pPage The page.
2758 */
2759void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2760{
2761 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2762 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2763 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2764 && pPool->iModifiedHead != pPage->idx,
2765 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2766 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2767 pPool->iModifiedHead, pPool->cModifiedPages));
2768
2769 pPage->iModifiedNext = pPool->iModifiedHead;
2770 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2771 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2772 pPool->iModifiedHead = pPage->idx;
2773 pPool->cModifiedPages++;
2774#ifdef VBOX_WITH_STATISTICS
2775 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2776 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2777#endif
2778}
2779
2780
2781/**
2782 * Removes the page from the list of modified pages and resets the
2783 * modification counter.
2784 *
2785 * @param pPool The pool.
2786 * @param pPage The page which is believed to be in the list of modified pages.
2787 */
2788static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2789{
2790 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2791 if (pPool->iModifiedHead == pPage->idx)
2792 {
2793 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2794 pPool->iModifiedHead = pPage->iModifiedNext;
2795 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2796 {
2797 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2798 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2799 }
2800 pPool->cModifiedPages--;
2801 }
2802 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2803 {
2804 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2805 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2806 {
2807 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2808 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2809 }
2810 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2811 pPool->cModifiedPages--;
2812 }
2813 else
2814 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2815 pPage->cModifications = 0;
2816}
2817
2818
2819/**
2820 * Zaps the list of modified pages, resetting their modification counters in the process.
2821 *
2822 * @param pVM The cross context VM structure.
2823 */
2824static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2825{
2826 pgmLock(pVM);
2827 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2828 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2829
2830 unsigned cPages = 0; NOREF(cPages);
2831
2832#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2833 pgmPoolResetDirtyPages(pVM);
2834#endif
2835
2836 uint16_t idx = pPool->iModifiedHead;
2837 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2838 while (idx != NIL_PGMPOOL_IDX)
2839 {
2840 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2841 idx = pPage->iModifiedNext;
2842 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2843 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2844 pPage->cModifications = 0;
2845 Assert(++cPages);
2846 }
2847 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2848 pPool->cModifiedPages = 0;
2849 pgmUnlock(pVM);
2850}
2851
2852
2853/**
2854 * Handle SyncCR3 pool tasks
2855 *
2856 * @returns VBox status code.
2857 * @retval VINF_SUCCESS if successfully added.
2858 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2859 * @param pVCpu The cross context virtual CPU structure.
2860 * @remark Should only be used when monitoring is available, thus placed in
2861 * the PGMPOOL_WITH_MONITORING \#ifdef.
2862 */
2863int pgmPoolSyncCR3(PVMCPU pVCpu)
2864{
2865 PVM pVM = pVCpu->CTX_SUFF(pVM);
2866 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2867
2868 /*
2869 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2870 * Occasionally we will have to clear all the shadow page tables because we wanted
2871 * to monitor a page which was mapped by too many shadowed page tables. This operation
2872 * sometimes referred to as a 'lightweight flush'.
2873 */
2874# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2875 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2876 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2877# else /* !IN_RING3 */
2878 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2879 {
2880 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2881 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2882
2883 /* Make sure all other VCPUs return to ring 3. */
2884 if (pVM->cCpus > 1)
2885 {
2886 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2887 PGM_INVL_ALL_VCPU_TLBS(pVM);
2888 }
2889 return VINF_PGM_SYNC_CR3;
2890 }
2891# endif /* !IN_RING3 */
2892 else
2893 {
2894 pgmPoolMonitorModifiedClearAll(pVM);
2895
2896 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2897 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2898 {
2899 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2900 return pgmPoolSyncCR3(pVCpu);
2901 }
2902 }
2903 return VINF_SUCCESS;
2904}
2905
2906
2907/**
2908 * Frees up at least one user entry.
2909 *
2910 * @returns VBox status code.
2911 * @retval VINF_SUCCESS if successfully added.
2912 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2913 * @param pPool The pool.
2914 * @param iUser The user index.
2915 */
2916static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2917{
2918 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2919 /*
2920 * Just free cached pages in a braindead fashion.
2921 */
2922 /** @todo walk the age list backwards and free the first with usage. */
2923 int rc = VINF_SUCCESS;
2924 do
2925 {
2926 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2927 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2928 rc = rc2;
2929 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2930 return rc;
2931}
2932
2933
2934/**
2935 * Inserts a page into the cache.
2936 *
2937 * This will create user node for the page, insert it into the GCPhys
2938 * hash, and insert it into the age list.
2939 *
2940 * @returns VBox status code.
2941 * @retval VINF_SUCCESS if successfully added.
2942 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2943 * @param pPool The pool.
2944 * @param pPage The cached page.
2945 * @param GCPhys The GC physical address of the page we're gonna shadow.
2946 * @param iUser The user index.
2947 * @param iUserTable The user table index.
2948 */
2949DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2950{
2951 int rc = VINF_SUCCESS;
2952 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2953
2954 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable)); RT_NOREF_PV(GCPhys);
2955
2956 if (iUser != NIL_PGMPOOL_IDX)
2957 {
2958#ifdef VBOX_STRICT
2959 /*
2960 * Check that the entry doesn't already exists.
2961 */
2962 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2963 {
2964 uint16_t i = pPage->iUserHead;
2965 do
2966 {
2967 Assert(i < pPool->cMaxUsers);
2968 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2969 i = paUsers[i].iNext;
2970 } while (i != NIL_PGMPOOL_USER_INDEX);
2971 }
2972#endif
2973
2974 /*
2975 * Find free a user node.
2976 */
2977 uint16_t i = pPool->iUserFreeHead;
2978 if (i == NIL_PGMPOOL_USER_INDEX)
2979 {
2980 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2981 if (RT_FAILURE(rc))
2982 return rc;
2983 i = pPool->iUserFreeHead;
2984 }
2985
2986 /*
2987 * Unlink the user node from the free list,
2988 * initialize and insert it into the user list.
2989 */
2990 pPool->iUserFreeHead = paUsers[i].iNext;
2991 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2992 paUsers[i].iUser = iUser;
2993 paUsers[i].iUserTable = iUserTable;
2994 pPage->iUserHead = i;
2995 }
2996 else
2997 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2998
2999
3000 /*
3001 * Insert into cache and enable monitoring of the guest page if enabled.
3002 *
3003 * Until we implement caching of all levels, including the CR3 one, we'll
3004 * have to make sure we don't try monitor & cache any recursive reuse of
3005 * a monitored CR3 page. Because all windows versions are doing this we'll
3006 * have to be able to do combined access monitoring, CR3 + PT and
3007 * PD + PT (guest PAE).
3008 *
3009 * Update:
3010 * We're now cooperating with the CR3 monitor if an uncachable page is found.
3011 */
3012 const bool fCanBeMonitored = true;
3013 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
3014 if (fCanBeMonitored)
3015 {
3016 rc = pgmPoolMonitorInsert(pPool, pPage);
3017 AssertRC(rc);
3018 }
3019 return rc;
3020}
3021
3022
3023/**
3024 * Adds a user reference to a page.
3025 *
3026 * This will move the page to the head of the
3027 *
3028 * @returns VBox status code.
3029 * @retval VINF_SUCCESS if successfully added.
3030 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3031 * @param pPool The pool.
3032 * @param pPage The cached page.
3033 * @param iUser The user index.
3034 * @param iUserTable The user table.
3035 */
3036static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3037{
3038 Log3(("pgmPoolTrackAddUser: GCPhys=%RGp iUser=%x iUserTable=%x\n", pPage->GCPhys, iUser, iUserTable));
3039 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3040 Assert(iUser != NIL_PGMPOOL_IDX);
3041
3042# ifdef VBOX_STRICT
3043 /*
3044 * Check that the entry doesn't already exists. We only allow multiple
3045 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
3046 */
3047 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
3048 {
3049 uint16_t i = pPage->iUserHead;
3050 do
3051 {
3052 Assert(i < pPool->cMaxUsers);
3053 /** @todo this assertion looks odd... Shouldn't it be && here? */
3054 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
3055 i = paUsers[i].iNext;
3056 } while (i != NIL_PGMPOOL_USER_INDEX);
3057 }
3058# endif
3059
3060 /*
3061 * Allocate a user node.
3062 */
3063 uint16_t i = pPool->iUserFreeHead;
3064 if (i == NIL_PGMPOOL_USER_INDEX)
3065 {
3066 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
3067 if (RT_FAILURE(rc))
3068 return rc;
3069 i = pPool->iUserFreeHead;
3070 }
3071 pPool->iUserFreeHead = paUsers[i].iNext;
3072
3073 /*
3074 * Initialize the user node and insert it.
3075 */
3076 paUsers[i].iNext = pPage->iUserHead;
3077 paUsers[i].iUser = iUser;
3078 paUsers[i].iUserTable = iUserTable;
3079 pPage->iUserHead = i;
3080
3081# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3082 if (pPage->fDirty)
3083 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirtyEntry, false /* do not remove */);
3084# endif
3085
3086 /*
3087 * Tell the cache to update its replacement stats for this page.
3088 */
3089 pgmPoolCacheUsed(pPool, pPage);
3090 return VINF_SUCCESS;
3091}
3092
3093
3094/**
3095 * Frees a user record associated with a page.
3096 *
3097 * This does not clear the entry in the user table, it simply replaces the
3098 * user record to the chain of free records.
3099 *
3100 * @param pPool The pool.
3101 * @param pPage The shadow page.
3102 * @param iUser The shadow page pool index of the user table.
3103 * @param iUserTable The index into the user table (shadowed).
3104 *
3105 * @remarks Don't call this for root pages.
3106 */
3107static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3108{
3109 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
3110 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3111 Assert(iUser != NIL_PGMPOOL_IDX);
3112
3113 /*
3114 * Unlink and free the specified user entry.
3115 */
3116
3117 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
3118 uint16_t i = pPage->iUserHead;
3119 if ( i != NIL_PGMPOOL_USER_INDEX
3120 && paUsers[i].iUser == iUser
3121 && paUsers[i].iUserTable == iUserTable)
3122 {
3123 pPage->iUserHead = paUsers[i].iNext;
3124
3125 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3126 paUsers[i].iNext = pPool->iUserFreeHead;
3127 pPool->iUserFreeHead = i;
3128 return;
3129 }
3130
3131 /* General: Linear search. */
3132 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
3133 while (i != NIL_PGMPOOL_USER_INDEX)
3134 {
3135 if ( paUsers[i].iUser == iUser
3136 && paUsers[i].iUserTable == iUserTable)
3137 {
3138 if (iPrev != NIL_PGMPOOL_USER_INDEX)
3139 paUsers[iPrev].iNext = paUsers[i].iNext;
3140 else
3141 pPage->iUserHead = paUsers[i].iNext;
3142
3143 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3144 paUsers[i].iNext = pPool->iUserFreeHead;
3145 pPool->iUserFreeHead = i;
3146 return;
3147 }
3148 iPrev = i;
3149 i = paUsers[i].iNext;
3150 }
3151
3152 /* Fatal: didn't find it */
3153 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
3154 iUser, iUserTable, pPage->GCPhys));
3155}
3156
3157
3158#if 0 /* unused */
3159/**
3160 * Gets the entry size of a shadow table.
3161 *
3162 * @param enmKind The kind of page.
3163 *
3164 * @returns The size of the entry in bytes. That is, 4 or 8.
3165 * @returns If the kind is not for a table, an assertion is raised and 0 is
3166 * returned.
3167 */
3168DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
3169{
3170 switch (enmKind)
3171 {
3172 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3173 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3174 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3175 case PGMPOOLKIND_32BIT_PD:
3176 case PGMPOOLKIND_32BIT_PD_PHYS:
3177 return 4;
3178
3179 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3180 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3181 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3182 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3183 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3184 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3185 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3186 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3187 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3188 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3189 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3190 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3191 case PGMPOOLKIND_64BIT_PML4:
3192 case PGMPOOLKIND_PAE_PDPT:
3193 case PGMPOOLKIND_ROOT_NESTED:
3194 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3195 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3196 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3197 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3198 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3199 case PGMPOOLKIND_PAE_PD_PHYS:
3200 case PGMPOOLKIND_PAE_PDPT_PHYS:
3201 return 8;
3202
3203 default:
3204 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3205 }
3206}
3207#endif /* unused */
3208
3209#if 0 /* unused */
3210/**
3211 * Gets the entry size of a guest table.
3212 *
3213 * @param enmKind The kind of page.
3214 *
3215 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3216 * @returns If the kind is not for a table, an assertion is raised and 0 is
3217 * returned.
3218 */
3219DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3220{
3221 switch (enmKind)
3222 {
3223 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3224 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3225 case PGMPOOLKIND_32BIT_PD:
3226 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3227 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3228 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3229 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3230 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3231 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3232 return 4;
3233
3234 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3235 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3236 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3237 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3238 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3239 case PGMPOOLKIND_64BIT_PML4:
3240 case PGMPOOLKIND_PAE_PDPT:
3241 return 8;
3242
3243 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3244 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3245 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3246 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3247 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3248 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3249 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3250 case PGMPOOLKIND_ROOT_NESTED:
3251 case PGMPOOLKIND_PAE_PD_PHYS:
3252 case PGMPOOLKIND_PAE_PDPT_PHYS:
3253 case PGMPOOLKIND_32BIT_PD_PHYS:
3254 /** @todo can we return 0? (nobody is calling this...) */
3255 AssertFailed();
3256 return 0;
3257
3258 default:
3259 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3260 }
3261}
3262#endif /* unused */
3263
3264
3265/**
3266 * Checks one shadow page table entry for a mapping of a physical page.
3267 *
3268 * @returns true / false indicating removal of all relevant PTEs
3269 *
3270 * @param pVM The cross context VM structure.
3271 * @param pPhysPage The guest page in question.
3272 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3273 * @param iShw The shadow page table.
3274 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3275 */
3276static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3277{
3278 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3279 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3280 bool fRet = false;
3281
3282 /*
3283 * Assert sanity.
3284 */
3285 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3286 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3287 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3288
3289 /*
3290 * Then, clear the actual mappings to the page in the shadow PT.
3291 */
3292 switch (pPage->enmKind)
3293 {
3294 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3295 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3296 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3297 {
3298 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3299 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3300 uint32_t u32AndMask = 0;
3301 uint32_t u32OrMask = 0;
3302
3303 if (!fFlushPTEs)
3304 {
3305 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3306 {
3307 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
3308 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
3309 u32OrMask = X86_PTE_RW;
3310 u32AndMask = UINT32_MAX;
3311 fRet = true;
3312 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3313 break;
3314
3315 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3316 u32OrMask = 0;
3317 u32AndMask = ~X86_PTE_RW;
3318 fRet = true;
3319 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3320 break;
3321 default:
3322 /* (shouldn't be here, will assert below) */
3323 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3324 break;
3325 }
3326 }
3327 else
3328 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3329
3330 /* Update the counter if we're removing references. */
3331 if (!u32AndMask)
3332 {
3333 Assert(pPage->cPresent);
3334 Assert(pPool->cPresent);
3335 pPage->cPresent--;
3336 pPool->cPresent--;
3337 }
3338
3339 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3340 {
3341 X86PTE Pte;
3342
3343 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3344 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3345 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3346 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3347
3348 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3349 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3350 return fRet;
3351 }
3352#ifdef LOG_ENABLED
3353 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3354 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3355 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3356 {
3357 Log(("i=%d cFound=%d\n", i, ++cFound));
3358 }
3359#endif
3360 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3361 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3362 break;
3363 }
3364
3365 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3366 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3367 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3368 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3369 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3370 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3371 {
3372 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3373 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3374 uint64_t u64OrMask = 0;
3375 uint64_t u64AndMask = 0;
3376
3377 if (!fFlushPTEs)
3378 {
3379 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3380 {
3381 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3382 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3383 u64OrMask = X86_PTE_RW;
3384 u64AndMask = UINT64_MAX;
3385 fRet = true;
3386 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3387 break;
3388
3389 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3390 u64OrMask = 0;
3391 u64AndMask = ~(uint64_t)X86_PTE_RW;
3392 fRet = true;
3393 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3394 break;
3395
3396 default:
3397 /* (shouldn't be here, will assert below) */
3398 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3399 break;
3400 }
3401 }
3402 else
3403 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3404
3405 /* Update the counter if we're removing references. */
3406 if (!u64AndMask)
3407 {
3408 Assert(pPage->cPresent);
3409 Assert(pPool->cPresent);
3410 pPage->cPresent--;
3411 pPool->cPresent--;
3412 }
3413
3414 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3415 {
3416 X86PTEPAE Pte;
3417
3418 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3419 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3420 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3421 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3422
3423 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3424 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3425 return fRet;
3426 }
3427#ifdef LOG_ENABLED
3428 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3429 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3430 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3431 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3432 Log(("i=%d cFound=%d\n", i, ++cFound));
3433#endif
3434 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3435 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3436 break;
3437 }
3438
3439#ifdef PGM_WITH_LARGE_PAGES
3440 /* Large page case only. */
3441 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3442 {
3443 Assert(pVM->pgm.s.fNestedPaging);
3444
3445 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3446 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3447
3448 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3449 {
3450 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3451 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3452 pPD->a[iPte].u = 0;
3453 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3454
3455 /* Update the counter as we're removing references. */
3456 Assert(pPage->cPresent);
3457 Assert(pPool->cPresent);
3458 pPage->cPresent--;
3459 pPool->cPresent--;
3460
3461 return fRet;
3462 }
3463# ifdef LOG_ENABLED
3464 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3465 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3466 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3467 Log(("i=%d cFound=%d\n", i, ++cFound));
3468# endif
3469 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3470 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3471 break;
3472 }
3473
3474 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3475 case PGMPOOLKIND_PAE_PD_PHYS:
3476 {
3477 Assert(pVM->pgm.s.fNestedPaging);
3478
3479 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3480 PX86PD pPD = (PX86PD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3481
3482 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3483 {
3484 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3485 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3486 pPD->a[iPte].u = 0;
3487 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3488
3489 /* Update the counter as we're removing references. */
3490 Assert(pPage->cPresent);
3491 Assert(pPool->cPresent);
3492 pPage->cPresent--;
3493 pPool->cPresent--;
3494 return fRet;
3495 }
3496# ifdef LOG_ENABLED
3497 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3498 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3499 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3500 Log(("i=%d cFound=%d\n", i, ++cFound));
3501# endif
3502 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3503 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3504 break;
3505 }
3506#endif /* PGM_WITH_LARGE_PAGES */
3507
3508 default:
3509 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3510 }
3511
3512 /* not reached. */
3513#ifndef _MSC_VER
3514 return fRet;
3515#endif
3516}
3517
3518
3519/**
3520 * Scans one shadow page table for mappings of a physical page.
3521 *
3522 * @param pVM The cross context VM structure.
3523 * @param pPhysPage The guest page in question.
3524 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3525 * @param iShw The shadow page table.
3526 */
3527static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3528{
3529 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3530
3531 /* We should only come here with when there's only one reference to this physical page. */
3532 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3533
3534 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3535 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3536 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3537 if (!fKeptPTEs)
3538 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3539 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3540}
3541
3542
3543/**
3544 * Flushes a list of shadow page tables mapping the same physical page.
3545 *
3546 * @param pVM The cross context VM structure.
3547 * @param pPhysPage The guest page in question.
3548 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3549 * @param iPhysExt The physical cross reference extent list to flush.
3550 */
3551static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3552{
3553 PGM_LOCK_ASSERT_OWNER(pVM);
3554 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3555 bool fKeepList = false;
3556
3557 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3558 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt=%u\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3559
3560 const uint16_t iPhysExtStart = iPhysExt;
3561 PPGMPOOLPHYSEXT pPhysExt;
3562 do
3563 {
3564 Assert(iPhysExt < pPool->cMaxPhysExts);
3565 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3566 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3567 {
3568 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3569 {
3570 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3571 if (!fKeptPTEs)
3572 {
3573 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3574 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3575 }
3576 else
3577 fKeepList = true;
3578 }
3579 }
3580 /* next */
3581 iPhysExt = pPhysExt->iNext;
3582 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3583
3584 if (!fKeepList)
3585 {
3586 /* insert the list into the free list and clear the ram range entry. */
3587 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3588 pPool->iPhysExtFreeHead = iPhysExtStart;
3589 /* Invalidate the tracking data. */
3590 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3591 }
3592
3593 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3594}
3595
3596
3597/**
3598 * Flushes all shadow page table mappings of the given guest page.
3599 *
3600 * This is typically called when the host page backing the guest one has been
3601 * replaced or when the page protection was changed due to a guest access
3602 * caught by the monitoring.
3603 *
3604 * @returns VBox status code.
3605 * @retval VINF_SUCCESS if all references has been successfully cleared.
3606 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3607 * pool cleaning. FF and sync flags are set.
3608 *
3609 * @param pVM The cross context VM structure.
3610 * @param GCPhysPage GC physical address of the page in question
3611 * @param pPhysPage The guest page in question.
3612 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3613 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3614 * flushed, it is NOT touched if this isn't necessary.
3615 * The caller MUST initialized this to @a false.
3616 */
3617int pgmPoolTrackUpdateGCPhys(PVM pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3618{
3619 PVMCPU pVCpu = VMMGetCpu(pVM);
3620 pgmLock(pVM);
3621 int rc = VINF_SUCCESS;
3622
3623#ifdef PGM_WITH_LARGE_PAGES
3624 /* Is this page part of a large page? */
3625 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3626 {
3627 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3628 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3629
3630 /* Fetch the large page base. */
3631 PPGMPAGE pLargePage;
3632 if (GCPhysBase != GCPhysPage)
3633 {
3634 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3635 AssertFatal(pLargePage);
3636 }
3637 else
3638 pLargePage = pPhysPage;
3639
3640 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3641
3642 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3643 {
3644 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3645 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3646 pVM->pgm.s.cLargePagesDisabled++;
3647
3648 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3649 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3650
3651 *pfFlushTLBs = true;
3652 pgmUnlock(pVM);
3653 return rc;
3654 }
3655 }
3656#else
3657 NOREF(GCPhysPage);
3658#endif /* PGM_WITH_LARGE_PAGES */
3659
3660 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3661 if (u16)
3662 {
3663 /*
3664 * The zero page is currently screwing up the tracking and we'll
3665 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3666 * is defined, zero pages won't normally be mapped. Some kind of solution
3667 * will be needed for this problem of course, but it will have to wait...
3668 */
3669 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3670 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3671 rc = VINF_PGM_GCPHYS_ALIASED;
3672 else
3673 {
3674# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC) /** @todo we can drop this now. */
3675 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3676 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3677 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
3678# endif
3679
3680 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3681 {
3682 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3683 pgmPoolTrackFlushGCPhysPT(pVM,
3684 pPhysPage,
3685 fFlushPTEs,
3686 PGMPOOL_TD_GET_IDX(u16));
3687 }
3688 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3689 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3690 else
3691 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3692 *pfFlushTLBs = true;
3693
3694# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
3695 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
3696# endif
3697 }
3698 }
3699
3700 if (rc == VINF_PGM_GCPHYS_ALIASED)
3701 {
3702 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3703 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3704 rc = VINF_PGM_SYNC_CR3;
3705 }
3706 pgmUnlock(pVM);
3707 return rc;
3708}
3709
3710
3711/**
3712 * Scans all shadow page tables for mappings of a physical page.
3713 *
3714 * This may be slow, but it's most likely more efficient than cleaning
3715 * out the entire page pool / cache.
3716 *
3717 * @returns VBox status code.
3718 * @retval VINF_SUCCESS if all references has been successfully cleared.
3719 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3720 * a page pool cleaning.
3721 *
3722 * @param pVM The cross context VM structure.
3723 * @param pPhysPage The guest page in question.
3724 */
3725int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3726{
3727 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3728 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3729 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3730 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3731
3732 /*
3733 * There is a limit to what makes sense.
3734 */
3735 if ( pPool->cPresent > 1024
3736 && pVM->cCpus == 1)
3737 {
3738 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3739 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3740 return VINF_PGM_GCPHYS_ALIASED;
3741 }
3742
3743 /*
3744 * Iterate all the pages until we've encountered all that in use.
3745 * This is simple but not quite optimal solution.
3746 */
3747 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P; /** @todo drop X86_PTE_P here as we always test if present separately, anyway. */
3748 const uint32_t u32 = u64; /** @todo move into the 32BIT_PT_xx case */
3749 unsigned cLeft = pPool->cUsedPages;
3750 unsigned iPage = pPool->cCurPages;
3751 while (--iPage >= PGMPOOL_IDX_FIRST)
3752 {
3753 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3754 if ( pPage->GCPhys != NIL_RTGCPHYS
3755 && pPage->cPresent)
3756 {
3757 switch (pPage->enmKind)
3758 {
3759 /*
3760 * We only care about shadow page tables.
3761 */
3762 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3763 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3764 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3765 {
3766 unsigned cPresent = pPage->cPresent;
3767 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3768 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3769 if (pPT->a[i].n.u1Present)
3770 {
3771 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3772 {
3773 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3774 pPT->a[i].u = 0;
3775
3776 /* Update the counter as we're removing references. */
3777 Assert(pPage->cPresent);
3778 Assert(pPool->cPresent);
3779 pPage->cPresent--;
3780 pPool->cPresent--;
3781 }
3782 if (!--cPresent)
3783 break;
3784 }
3785 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3786 break;
3787 }
3788
3789 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3790 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3791 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3792 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3793 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3794 {
3795 unsigned cPresent = pPage->cPresent;
3796 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3797 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3798 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3799 {
3800 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3801 {
3802 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3803 PGMSHWPTEPAE_SET(pPT->a[i], 0); /// @todo why not atomic?
3804
3805 /* Update the counter as we're removing references. */
3806 Assert(pPage->cPresent);
3807 Assert(pPool->cPresent);
3808 pPage->cPresent--;
3809 pPool->cPresent--;
3810 }
3811 if (!--cPresent)
3812 break;
3813 }
3814 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3815 break;
3816 }
3817#ifndef IN_RC
3818 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3819 {
3820 unsigned cPresent = pPage->cPresent;
3821 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3822 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3823 if (pPT->a[i].n.u1Present)
3824 {
3825 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3826 {
3827 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3828 pPT->a[i].u = 0;
3829
3830 /* Update the counter as we're removing references. */
3831 Assert(pPage->cPresent);
3832 Assert(pPool->cPresent);
3833 pPage->cPresent--;
3834 pPool->cPresent--;
3835 }
3836 if (!--cPresent)
3837 break;
3838 }
3839 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3840 break;
3841 }
3842#endif
3843 }
3844 if (!--cLeft)
3845 break;
3846 }
3847 }
3848
3849 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3850 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3851
3852 /*
3853 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3854 */
3855 if (pPool->cPresent > 1024)
3856 {
3857 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3858 return VINF_PGM_GCPHYS_ALIASED;
3859 }
3860
3861 return VINF_SUCCESS;
3862}
3863
3864
3865/**
3866 * Clears the user entry in a user table.
3867 *
3868 * This is used to remove all references to a page when flushing it.
3869 */
3870static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3871{
3872 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3873 Assert(pUser->iUser < pPool->cCurPages);
3874 uint32_t iUserTable = pUser->iUserTable;
3875
3876 /*
3877 * Map the user page. Ignore references made by fictitious pages.
3878 */
3879 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3880 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3881 union
3882 {
3883 uint64_t *pau64;
3884 uint32_t *pau32;
3885 } u;
3886 if (pUserPage->idx < PGMPOOL_IDX_FIRST)
3887 {
3888 Assert(!pUserPage->pvPageR3);
3889 return;
3890 }
3891 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3892
3893
3894 /* Safety precaution in case we change the paging for other modes too in the future. */
3895 Assert(!pgmPoolIsPageLocked(pPage)); RT_NOREF_PV(pPage);
3896
3897#ifdef VBOX_STRICT
3898 /*
3899 * Some sanity checks.
3900 */
3901 switch (pUserPage->enmKind)
3902 {
3903 case PGMPOOLKIND_32BIT_PD:
3904 case PGMPOOLKIND_32BIT_PD_PHYS:
3905 Assert(iUserTable < X86_PG_ENTRIES);
3906 break;
3907 case PGMPOOLKIND_PAE_PDPT:
3908 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3909 case PGMPOOLKIND_PAE_PDPT_PHYS:
3910 Assert(iUserTable < 4);
3911 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3912 break;
3913 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3914 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3915 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3916 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3917 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3918 case PGMPOOLKIND_PAE_PD_PHYS:
3919 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3920 break;
3921 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3922 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3923 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3924 break;
3925 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3926 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3927 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3928 break;
3929 case PGMPOOLKIND_64BIT_PML4:
3930 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3931 /* GCPhys >> PAGE_SHIFT is the index here */
3932 break;
3933 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3934 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3935 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3936 break;
3937
3938 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3939 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3940 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3941 break;
3942
3943 case PGMPOOLKIND_ROOT_NESTED:
3944 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3945 break;
3946
3947 default:
3948 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3949 break;
3950 }
3951#endif /* VBOX_STRICT */
3952
3953 /*
3954 * Clear the entry in the user page.
3955 */
3956 switch (pUserPage->enmKind)
3957 {
3958 /* 32-bit entries */
3959 case PGMPOOLKIND_32BIT_PD:
3960 case PGMPOOLKIND_32BIT_PD_PHYS:
3961 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
3962 break;
3963
3964 /* 64-bit entries */
3965 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3966 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3967 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3968 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3969 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3970#ifdef IN_RC
3971 /*
3972 * In 32 bits PAE mode we *must* invalidate the TLB when changing a
3973 * PDPT entry; the CPU fetches them only during cr3 load, so any
3974 * non-present PDPT will continue to cause page faults.
3975 */
3976 ASMReloadCR3();
3977#endif
3978 /* fall thru */
3979 case PGMPOOLKIND_PAE_PD_PHYS:
3980 case PGMPOOLKIND_PAE_PDPT_PHYS:
3981 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3982 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3983 case PGMPOOLKIND_64BIT_PML4:
3984 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3985 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3986 case PGMPOOLKIND_PAE_PDPT:
3987 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3988 case PGMPOOLKIND_ROOT_NESTED:
3989 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3990 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3991 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
3992 break;
3993
3994 default:
3995 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3996 }
3997 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
3998}
3999
4000
4001/**
4002 * Clears all users of a page.
4003 */
4004static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4005{
4006 /*
4007 * Free all the user records.
4008 */
4009 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
4010
4011 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
4012 uint16_t i = pPage->iUserHead;
4013 while (i != NIL_PGMPOOL_USER_INDEX)
4014 {
4015 /* Clear enter in user table. */
4016 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
4017
4018 /* Free it. */
4019 const uint16_t iNext = paUsers[i].iNext;
4020 paUsers[i].iUser = NIL_PGMPOOL_IDX;
4021 paUsers[i].iNext = pPool->iUserFreeHead;
4022 pPool->iUserFreeHead = i;
4023
4024 /* Next. */
4025 i = iNext;
4026 }
4027 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
4028}
4029
4030
4031/**
4032 * Allocates a new physical cross reference extent.
4033 *
4034 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
4035 * @param pVM The cross context VM structure.
4036 * @param piPhysExt Where to store the phys ext index.
4037 */
4038PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
4039{
4040 PGM_LOCK_ASSERT_OWNER(pVM);
4041 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4042 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
4043 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4044 {
4045 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
4046 return NULL;
4047 }
4048 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4049 pPool->iPhysExtFreeHead = pPhysExt->iNext;
4050 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4051 *piPhysExt = iPhysExt;
4052 return pPhysExt;
4053}
4054
4055
4056/**
4057 * Frees a physical cross reference extent.
4058 *
4059 * @param pVM The cross context VM structure.
4060 * @param iPhysExt The extent to free.
4061 */
4062void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
4063{
4064 PGM_LOCK_ASSERT_OWNER(pVM);
4065 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4066 Assert(iPhysExt < pPool->cMaxPhysExts);
4067 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4068 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4069 {
4070 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4071 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4072 }
4073 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4074 pPool->iPhysExtFreeHead = iPhysExt;
4075}
4076
4077
4078/**
4079 * Frees a physical cross reference extent.
4080 *
4081 * @param pVM The cross context VM structure.
4082 * @param iPhysExt The extent to free.
4083 */
4084void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
4085{
4086 PGM_LOCK_ASSERT_OWNER(pVM);
4087 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4088
4089 const uint16_t iPhysExtStart = iPhysExt;
4090 PPGMPOOLPHYSEXT pPhysExt;
4091 do
4092 {
4093 Assert(iPhysExt < pPool->cMaxPhysExts);
4094 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4095 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4096 {
4097 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4098 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4099 }
4100
4101 /* next */
4102 iPhysExt = pPhysExt->iNext;
4103 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4104
4105 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4106 pPool->iPhysExtFreeHead = iPhysExtStart;
4107}
4108
4109
4110/**
4111 * Insert a reference into a list of physical cross reference extents.
4112 *
4113 * @returns The new tracking data for PGMPAGE.
4114 *
4115 * @param pVM The cross context VM structure.
4116 * @param iPhysExt The physical extent index of the list head.
4117 * @param iShwPT The shadow page table index.
4118 * @param iPte Page table entry
4119 *
4120 */
4121static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
4122{
4123 PGM_LOCK_ASSERT_OWNER(pVM);
4124 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4125 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4126
4127 /*
4128 * Special common cases.
4129 */
4130 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
4131 {
4132 paPhysExts[iPhysExt].aidx[1] = iShwPT;
4133 paPhysExts[iPhysExt].apte[1] = iPte;
4134 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4135 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
4136 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4137 }
4138 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
4139 {
4140 paPhysExts[iPhysExt].aidx[2] = iShwPT;
4141 paPhysExts[iPhysExt].apte[2] = iPte;
4142 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4143 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
4144 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4145 }
4146 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
4147
4148 /*
4149 * General treatment.
4150 */
4151 const uint16_t iPhysExtStart = iPhysExt;
4152 unsigned cMax = 15;
4153 for (;;)
4154 {
4155 Assert(iPhysExt < pPool->cMaxPhysExts);
4156 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4157 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
4158 {
4159 paPhysExts[iPhysExt].aidx[i] = iShwPT;
4160 paPhysExts[iPhysExt].apte[i] = iPte;
4161 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4162 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
4163 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
4164 }
4165 if (!--cMax)
4166 {
4167 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackOverflows);
4168 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4169 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
4170 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4171 }
4172
4173 /* advance */
4174 iPhysExt = paPhysExts[iPhysExt].iNext;
4175 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4176 break;
4177 }
4178
4179 /*
4180 * Add another extent to the list.
4181 */
4182 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4183 if (!pNew)
4184 {
4185 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackNoExtentsLeft);
4186 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4187 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4188 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4189 }
4190 pNew->iNext = iPhysExtStart;
4191 pNew->aidx[0] = iShwPT;
4192 pNew->apte[0] = iPte;
4193 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4194 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4195}
4196
4197
4198/**
4199 * Add a reference to guest physical page where extents are in use.
4200 *
4201 * @returns The new tracking data for PGMPAGE.
4202 *
4203 * @param pVM The cross context VM structure.
4204 * @param pPhysPage Pointer to the aPages entry in the ram range.
4205 * @param u16 The ram range flags (top 16-bits).
4206 * @param iShwPT The shadow page table index.
4207 * @param iPte Page table entry
4208 */
4209uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4210{
4211 pgmLock(pVM);
4212 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4213 {
4214 /*
4215 * Convert to extent list.
4216 */
4217 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4218 uint16_t iPhysExt;
4219 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4220 if (pPhysExt)
4221 {
4222 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4223 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliased);
4224 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4225 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4226 pPhysExt->aidx[1] = iShwPT;
4227 pPhysExt->apte[1] = iPte;
4228 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4229 }
4230 else
4231 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4232 }
4233 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4234 {
4235 /*
4236 * Insert into the extent list.
4237 */
4238 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4239 }
4240 else
4241 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedLots);
4242 pgmUnlock(pVM);
4243 return u16;
4244}
4245
4246
4247/**
4248 * Clear references to guest physical memory.
4249 *
4250 * @param pPool The pool.
4251 * @param pPage The page.
4252 * @param pPhysPage Pointer to the aPages entry in the ram range.
4253 * @param iPte Shadow PTE index
4254 */
4255void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4256{
4257 PVM pVM = pPool->CTX_SUFF(pVM);
4258 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4259 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4260
4261 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4262 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4263 {
4264 pgmLock(pVM);
4265
4266 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4267 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4268 do
4269 {
4270 Assert(iPhysExt < pPool->cMaxPhysExts);
4271
4272 /*
4273 * Look for the shadow page and check if it's all freed.
4274 */
4275 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4276 {
4277 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4278 && paPhysExts[iPhysExt].apte[i] == iPte)
4279 {
4280 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4281 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4282
4283 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4284 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4285 {
4286 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4287 pgmUnlock(pVM);
4288 return;
4289 }
4290
4291 /* we can free the node. */
4292 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4293 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4294 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4295 {
4296 /* lonely node */
4297 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4298 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4299 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4300 }
4301 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4302 {
4303 /* head */
4304 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4305 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4306 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4307 }
4308 else
4309 {
4310 /* in list */
4311 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4312 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4313 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4314 }
4315 iPhysExt = iPhysExtNext;
4316 pgmUnlock(pVM);
4317 return;
4318 }
4319 }
4320
4321 /* next */
4322 iPhysExtPrev = iPhysExt;
4323 iPhysExt = paPhysExts[iPhysExt].iNext;
4324 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4325
4326 pgmUnlock(pVM);
4327 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4328 }
4329 else /* nothing to do */
4330 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4331}
4332
4333/**
4334 * Clear references to guest physical memory.
4335 *
4336 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4337 * physical address is assumed to be correct, so the linear search can be
4338 * skipped and we can assert at an earlier point.
4339 *
4340 * @param pPool The pool.
4341 * @param pPage The page.
4342 * @param HCPhys The host physical address corresponding to the guest page.
4343 * @param GCPhys The guest physical address corresponding to HCPhys.
4344 * @param iPte Shadow PTE index
4345 */
4346static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4347{
4348 /*
4349 * Lookup the page and check if it checks out before derefing it.
4350 */
4351 PVM pVM = pPool->CTX_SUFF(pVM);
4352 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4353 if (pPhysPage)
4354 {
4355 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4356#ifdef LOG_ENABLED
4357 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4358 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4359#endif
4360 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4361 {
4362 Assert(pPage->cPresent);
4363 Assert(pPool->cPresent);
4364 pPage->cPresent--;
4365 pPool->cPresent--;
4366 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4367 return;
4368 }
4369
4370 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp\n",
4371 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage)));
4372 }
4373 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4374}
4375
4376
4377/**
4378 * Clear references to guest physical memory.
4379 *
4380 * @param pPool The pool.
4381 * @param pPage The page.
4382 * @param HCPhys The host physical address corresponding to the guest page.
4383 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4384 * @param iPte Shadow pte index
4385 */
4386void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4387{
4388 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4389
4390 /*
4391 * Try the hint first.
4392 */
4393 RTHCPHYS HCPhysHinted;
4394 PVM pVM = pPool->CTX_SUFF(pVM);
4395 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4396 if (pPhysPage)
4397 {
4398 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4399 Assert(HCPhysHinted);
4400 if (HCPhysHinted == HCPhys)
4401 {
4402 Assert(pPage->cPresent);
4403 Assert(pPool->cPresent);
4404 pPage->cPresent--;
4405 pPool->cPresent--;
4406 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4407 return;
4408 }
4409 }
4410 else
4411 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4412
4413 /*
4414 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4415 */
4416 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4417 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX);
4418 while (pRam)
4419 {
4420 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4421 while (iPage-- > 0)
4422 {
4423 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4424 {
4425 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4426 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4427 Assert(pPage->cPresent);
4428 Assert(pPool->cPresent);
4429 pPage->cPresent--;
4430 pPool->cPresent--;
4431 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4432 return;
4433 }
4434 }
4435 pRam = pRam->CTX_SUFF(pNext);
4436 }
4437
4438 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4439}
4440
4441
4442/**
4443 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4444 *
4445 * @param pPool The pool.
4446 * @param pPage The page.
4447 * @param pShwPT The shadow page table (mapping of the page).
4448 * @param pGstPT The guest page table.
4449 */
4450DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4451{
4452 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4453 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4454 {
4455 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4456 if (pShwPT->a[i].n.u1Present)
4457 {
4458 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4459 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4460 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & fPgMask, i);
4461 if (!pPage->cPresent)
4462 break;
4463 }
4464 }
4465}
4466
4467
4468/**
4469 * Clear references to guest physical memory in a PAE / 32-bit page table.
4470 *
4471 * @param pPool The pool.
4472 * @param pPage The page.
4473 * @param pShwPT The shadow page table (mapping of the page).
4474 * @param pGstPT The guest page table (just a half one).
4475 */
4476DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4477{
4478 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4479 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4480 {
4481 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4482 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4483 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4484 {
4485 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4486 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4487 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4488 if (!pPage->cPresent)
4489 break;
4490 }
4491 }
4492}
4493
4494
4495/**
4496 * Clear references to guest physical memory in a PAE / PAE page table.
4497 *
4498 * @param pPool The pool.
4499 * @param pPage The page.
4500 * @param pShwPT The shadow page table (mapping of the page).
4501 * @param pGstPT The guest page table.
4502 */
4503DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4504{
4505 RTGCPHYS const fPgMask = pPage->fA20Enabled ? X86_PTE_PAE_PG_MASK : X86_PTE_PAE_PG_MASK & ~RT_BIT_64(20);
4506 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4507 {
4508 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4509 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4510 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4511 {
4512 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4513 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4514 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4515 if (!pPage->cPresent)
4516 break;
4517 }
4518 }
4519}
4520
4521
4522/**
4523 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4524 *
4525 * @param pPool The pool.
4526 * @param pPage The page.
4527 * @param pShwPT The shadow page table (mapping of the page).
4528 */
4529DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4530{
4531 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4532 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4533 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4534 {
4535 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4536 if (pShwPT->a[i].n.u1Present)
4537 {
4538 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4539 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4540 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4541 if (!pPage->cPresent)
4542 break;
4543 }
4544 }
4545}
4546
4547
4548/**
4549 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4550 *
4551 * @param pPool The pool.
4552 * @param pPage The page.
4553 * @param pShwPT The shadow page table (mapping of the page).
4554 */
4555DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4556{
4557 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4558 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4559 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4560 {
4561 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4562 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4563 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4564 {
4565 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4566 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4567 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys & GCPhysA20Mask, i);
4568 if (!pPage->cPresent)
4569 break;
4570 }
4571 }
4572}
4573
4574
4575/**
4576 * Clear references to shadowed pages in an EPT page table.
4577 *
4578 * @param pPool The pool.
4579 * @param pPage The page.
4580 * @param pShwPT The shadow page directory pointer table (mapping of the
4581 * page).
4582 */
4583DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4584{
4585 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4586 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4587 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4588 {
4589 Assert((pShwPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4590 if (pShwPT->a[i].n.u1Present)
4591 {
4592 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4593 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4594 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4595 if (!pPage->cPresent)
4596 break;
4597 }
4598 }
4599}
4600
4601
4602/**
4603 * Clear references to shadowed pages in a 32 bits page directory.
4604 *
4605 * @param pPool The pool.
4606 * @param pPage The page.
4607 * @param pShwPD The shadow page directory (mapping of the page).
4608 */
4609DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4610{
4611 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4612 {
4613 if ( pShwPD->a[i].n.u1Present
4614 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4615 )
4616 {
4617 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4618 if (pSubPage)
4619 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4620 else
4621 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4622 }
4623 }
4624}
4625
4626
4627/**
4628 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4629 *
4630 * @param pPool The pool.
4631 * @param pPage The page.
4632 * @param pShwPD The shadow page directory (mapping of the page).
4633 */
4634DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4635{
4636 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4637 {
4638 if ( pShwPD->a[i].n.u1Present
4639 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING))
4640 {
4641#ifdef PGM_WITH_LARGE_PAGES
4642 if (pShwPD->a[i].b.u1Size)
4643 {
4644 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4645 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4646 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK,
4647 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4648 i);
4649 }
4650 else
4651#endif
4652 {
4653 Assert((pShwPD->a[i].u & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000000))) == 0);
4654 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4655 if (pSubPage)
4656 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4657 else
4658 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4659 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4660 }
4661 }
4662 }
4663}
4664
4665
4666/**
4667 * Clear references to shadowed pages in a PAE page directory pointer table.
4668 *
4669 * @param pPool The pool.
4670 * @param pPage The page.
4671 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4672 */
4673DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4674{
4675 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4676 {
4677 Assert((pShwPDPT->a[i].u & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4678 if ( pShwPDPT->a[i].n.u1Present
4679 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4680 )
4681 {
4682 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4683 if (pSubPage)
4684 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4685 else
4686 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4687 }
4688 }
4689}
4690
4691
4692/**
4693 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4694 *
4695 * @param pPool The pool.
4696 * @param pPage The page.
4697 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4698 */
4699DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4700{
4701 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4702 {
4703 Assert((pShwPDPT->a[i].u & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4704 if (pShwPDPT->a[i].n.u1Present)
4705 {
4706 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4707 if (pSubPage)
4708 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4709 else
4710 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4711 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4712 }
4713 }
4714}
4715
4716
4717/**
4718 * Clear references to shadowed pages in a 64-bit level 4 page table.
4719 *
4720 * @param pPool The pool.
4721 * @param pPage The page.
4722 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4723 */
4724DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4725{
4726 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4727 {
4728 Assert((pShwPML4->a[i].u & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4729 if (pShwPML4->a[i].n.u1Present)
4730 {
4731 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4732 if (pSubPage)
4733 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4734 else
4735 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4736 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4737 }
4738 }
4739}
4740
4741
4742/**
4743 * Clear references to shadowed pages in an EPT page directory.
4744 *
4745 * @param pPool The pool.
4746 * @param pPage The page.
4747 * @param pShwPD The shadow page directory (mapping of the page).
4748 */
4749DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4750{
4751 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4752 {
4753 Assert((pShwPD->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4754 if (pShwPD->a[i].n.u1Present)
4755 {
4756#ifdef PGM_WITH_LARGE_PAGES
4757 if (pShwPD->a[i].b.u1Size)
4758 {
4759 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4760 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4761 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK,
4762 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4763 i);
4764 }
4765 else
4766#endif
4767 {
4768 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4769 if (pSubPage)
4770 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4771 else
4772 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4773 }
4774 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4775 }
4776 }
4777}
4778
4779
4780/**
4781 * Clear references to shadowed pages in an EPT page directory pointer table.
4782 *
4783 * @param pPool The pool.
4784 * @param pPage The page.
4785 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4786 */
4787DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4788{
4789 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4790 {
4791 Assert((pShwPDPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4792 if (pShwPDPT->a[i].n.u1Present)
4793 {
4794 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4795 if (pSubPage)
4796 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4797 else
4798 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4799 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4800 }
4801 }
4802}
4803
4804
4805/**
4806 * Clears all references made by this page.
4807 *
4808 * This includes other shadow pages and GC physical addresses.
4809 *
4810 * @param pPool The pool.
4811 * @param pPage The page.
4812 */
4813static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4814{
4815 /*
4816 * Map the shadow page and take action according to the page kind.
4817 */
4818 PVM pVM = pPool->CTX_SUFF(pVM);
4819 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4820 switch (pPage->enmKind)
4821 {
4822 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4823 {
4824 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4825 void *pvGst;
4826 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4827 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4828 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4829 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4830 break;
4831 }
4832
4833 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4834 {
4835 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4836 void *pvGst;
4837 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4838 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
4839 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4840 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4841 break;
4842 }
4843
4844 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4845 {
4846 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4847 void *pvGst;
4848 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4849 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
4850 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4851 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4852 break;
4853 }
4854
4855 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4856 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4857 {
4858 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4859 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4860 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4861 break;
4862 }
4863
4864 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4865 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4866 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4867 {
4868 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4869 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
4870 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4871 break;
4872 }
4873
4874 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4875 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4876 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4877 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4878 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4879 case PGMPOOLKIND_PAE_PD_PHYS:
4880 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4881 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4882 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4883 break;
4884
4885 case PGMPOOLKIND_32BIT_PD_PHYS:
4886 case PGMPOOLKIND_32BIT_PD:
4887 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4888 break;
4889
4890 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4891 case PGMPOOLKIND_PAE_PDPT:
4892 case PGMPOOLKIND_PAE_PDPT_PHYS:
4893 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4894 break;
4895
4896 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4897 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4898 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4899 break;
4900
4901 case PGMPOOLKIND_64BIT_PML4:
4902 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4903 break;
4904
4905 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4906 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4907 break;
4908
4909 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4910 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4911 break;
4912
4913 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4914 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4915 break;
4916
4917 default:
4918 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4919 }
4920
4921 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4922 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4923 ASMMemZeroPage(pvShw);
4924 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4925 pPage->fZeroed = true;
4926 Assert(!pPage->cPresent);
4927 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
4928}
4929
4930
4931/**
4932 * Flushes a pool page.
4933 *
4934 * This moves the page to the free list after removing all user references to it.
4935 *
4936 * @returns VBox status code.
4937 * @retval VINF_SUCCESS on success.
4938 * @param pPool The pool.
4939 * @param pPage The shadow page.
4940 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4941 */
4942int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4943{
4944 PVM pVM = pPool->CTX_SUFF(pVM);
4945 bool fFlushRequired = false;
4946
4947 int rc = VINF_SUCCESS;
4948 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4949 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4950 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4951
4952 /*
4953 * Reject any attempts at flushing any of the special root pages (shall
4954 * not happen).
4955 */
4956 AssertMsgReturn(pPage->idx >= PGMPOOL_IDX_FIRST,
4957 ("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n",
4958 pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx),
4959 VINF_SUCCESS);
4960
4961 pgmLock(pVM);
4962
4963 /*
4964 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4965 */
4966 if (pgmPoolIsPageLocked(pPage))
4967 {
4968 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4969 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4970 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4971 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4972 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4973 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4974 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4975 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4976 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
4977 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
4978 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4979 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4980 pgmUnlock(pVM);
4981 return VINF_SUCCESS;
4982 }
4983
4984#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4985 /* Start a subset so we won't run out of mapping space. */
4986 PVMCPU pVCpu = VMMGetCpu(pVM);
4987 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
4988#endif
4989
4990 /*
4991 * Mark the page as being in need of an ASMMemZeroPage().
4992 */
4993 pPage->fZeroed = false;
4994
4995#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4996 if (pPage->fDirty)
4997 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirtyEntry, false /* do not remove */);
4998#endif
4999
5000 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
5001 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
5002 fFlushRequired = true;
5003
5004 /*
5005 * Clear the page.
5006 */
5007 pgmPoolTrackClearPageUsers(pPool, pPage);
5008 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
5009 pgmPoolTrackDeref(pPool, pPage);
5010 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
5011
5012 /*
5013 * Flush it from the cache.
5014 */
5015 pgmPoolCacheFlushPage(pPool, pPage);
5016
5017#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
5018 /* Heavy stuff done. */
5019 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
5020#endif
5021
5022 /*
5023 * Deregistering the monitoring.
5024 */
5025 if (pPage->fMonitored)
5026 rc = pgmPoolMonitorFlush(pPool, pPage);
5027
5028 /*
5029 * Free the page.
5030 */
5031 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
5032 pPage->iNext = pPool->iFreeHead;
5033 pPool->iFreeHead = pPage->idx;
5034 pPage->enmKind = PGMPOOLKIND_FREE;
5035 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5036 pPage->GCPhys = NIL_RTGCPHYS;
5037 pPage->fReusedFlushPending = false;
5038
5039 pPool->cUsedPages--;
5040
5041 /* Flush the TLBs of all VCPUs if required. */
5042 if ( fFlushRequired
5043 && fFlush)
5044 {
5045 PGM_INVL_ALL_VCPU_TLBS(pVM);
5046 }
5047
5048 pgmUnlock(pVM);
5049 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
5050 return rc;
5051}
5052
5053
5054/**
5055 * Frees a usage of a pool page.
5056 *
5057 * The caller is responsible to updating the user table so that it no longer
5058 * references the shadow page.
5059 *
5060 * @param pPool The pool.
5061 * @param pPage The shadow page.
5062 * @param iUser The shadow page pool index of the user table.
5063 * NIL_PGMPOOL_IDX for root pages.
5064 * @param iUserTable The index into the user table (shadowed). Ignored if
5065 * root page.
5066 */
5067void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
5068{
5069 PVM pVM = pPool->CTX_SUFF(pVM);
5070
5071 STAM_PROFILE_START(&pPool->StatFree, a);
5072 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
5073 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
5074 AssertReturnVoid(pPage->idx >= PGMPOOL_IDX_FIRST); /* paranoia (#6349) */
5075
5076 pgmLock(pVM);
5077 if (iUser != NIL_PGMPOOL_IDX)
5078 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
5079 if (!pPage->fCached)
5080 pgmPoolFlushPage(pPool, pPage);
5081 pgmUnlock(pVM);
5082 STAM_PROFILE_STOP(&pPool->StatFree, a);
5083}
5084
5085
5086/**
5087 * Makes one or more free page free.
5088 *
5089 * @returns VBox status code.
5090 * @retval VINF_SUCCESS on success.
5091 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
5092 *
5093 * @param pPool The pool.
5094 * @param enmKind Page table kind
5095 * @param iUser The user of the page.
5096 */
5097static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
5098{
5099 PVM pVM = pPool->CTX_SUFF(pVM);
5100 LogFlow(("pgmPoolMakeMoreFreePages: enmKind=%d iUser=%d\n", enmKind, iUser));
5101 NOREF(enmKind);
5102
5103 /*
5104 * If the pool isn't full grown yet, expand it.
5105 */
5106 if ( pPool->cCurPages < pPool->cMaxPages
5107#if defined(IN_RC)
5108 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
5109 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
5110 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
5111#endif
5112 )
5113 {
5114 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
5115#ifdef IN_RING3
5116 int rc = PGMR3PoolGrow(pVM);
5117#else
5118 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
5119#endif
5120 if (RT_FAILURE(rc))
5121 return rc;
5122 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
5123 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
5124 return VINF_SUCCESS;
5125 }
5126
5127 /*
5128 * Free one cached page.
5129 */
5130 return pgmPoolCacheFreeOne(pPool, iUser);
5131}
5132
5133
5134/**
5135 * Allocates a page from the pool.
5136 *
5137 * This page may actually be a cached page and not in need of any processing
5138 * on the callers part.
5139 *
5140 * @returns VBox status code.
5141 * @retval VINF_SUCCESS if a NEW page was allocated.
5142 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
5143 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
5144 *
5145 * @param pVM The cross context VM structure.
5146 * @param GCPhys The GC physical address of the page we're gonna shadow.
5147 * For 4MB and 2MB PD entries, it's the first address the
5148 * shadow PT is covering.
5149 * @param enmKind The kind of mapping.
5150 * @param enmAccess Access type for the mapping (only relevant for big pages)
5151 * @param fA20Enabled Whether the A20 gate is enabled or not.
5152 * @param iUser The shadow page pool index of the user table. Root
5153 * pages should pass NIL_PGMPOOL_IDX.
5154 * @param iUserTable The index into the user table (shadowed). Ignored for
5155 * root pages (iUser == NIL_PGMPOOL_IDX).
5156 * @param fLockPage Lock the page
5157 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
5158 */
5159int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
5160 uint16_t iUser, uint32_t iUserTable, bool fLockPage, PPPGMPOOLPAGE ppPage)
5161{
5162 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5163 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
5164 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
5165 *ppPage = NULL;
5166 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
5167 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
5168 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
5169
5170 pgmLock(pVM);
5171
5172 if (pPool->fCacheEnabled)
5173 {
5174 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, fA20Enabled, iUser, iUserTable, ppPage);
5175 if (RT_SUCCESS(rc2))
5176 {
5177 if (fLockPage)
5178 pgmPoolLockPage(pPool, *ppPage);
5179 pgmUnlock(pVM);
5180 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5181 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
5182 return rc2;
5183 }
5184 }
5185
5186 /*
5187 * Allocate a new one.
5188 */
5189 int rc = VINF_SUCCESS;
5190 uint16_t iNew = pPool->iFreeHead;
5191 if (iNew == NIL_PGMPOOL_IDX)
5192 {
5193 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
5194 if (RT_FAILURE(rc))
5195 {
5196 pgmUnlock(pVM);
5197 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
5198 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5199 return rc;
5200 }
5201 iNew = pPool->iFreeHead;
5202 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_PGM_POOL_IPE);
5203 }
5204
5205 /* unlink the free head */
5206 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5207 pPool->iFreeHead = pPage->iNext;
5208 pPage->iNext = NIL_PGMPOOL_IDX;
5209
5210 /*
5211 * Initialize it.
5212 */
5213 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5214 pPage->enmKind = enmKind;
5215 pPage->enmAccess = enmAccess;
5216 pPage->GCPhys = GCPhys;
5217 pPage->fA20Enabled = fA20Enabled;
5218 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5219 pPage->fMonitored = false;
5220 pPage->fCached = false;
5221 pPage->fDirty = false;
5222 pPage->fReusedFlushPending = false;
5223 pPage->cModifications = 0;
5224 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5225 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5226 pPage->cPresent = 0;
5227 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5228 pPage->idxDirtyEntry = 0;
5229 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5230 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5231 pPage->cLastAccessHandler = 0;
5232 pPage->cLocked = 0;
5233# ifdef VBOX_STRICT
5234 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5235# endif
5236
5237 /*
5238 * Insert into the tracking and cache. If this fails, free the page.
5239 */
5240 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5241 if (RT_FAILURE(rc3))
5242 {
5243 pPool->cUsedPages--;
5244 pPage->enmKind = PGMPOOLKIND_FREE;
5245 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5246 pPage->GCPhys = NIL_RTGCPHYS;
5247 pPage->iNext = pPool->iFreeHead;
5248 pPool->iFreeHead = pPage->idx;
5249 pgmUnlock(pVM);
5250 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5251 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
5252 return rc3;
5253 }
5254
5255 /*
5256 * Commit the allocation, clear the page and return.
5257 */
5258#ifdef VBOX_WITH_STATISTICS
5259 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5260 pPool->cUsedPagesHigh = pPool->cUsedPages;
5261#endif
5262
5263 if (!pPage->fZeroed)
5264 {
5265 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5266 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5267 ASMMemZeroPage(pv);
5268 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5269 }
5270
5271 *ppPage = pPage;
5272 if (fLockPage)
5273 pgmPoolLockPage(pPool, pPage);
5274 pgmUnlock(pVM);
5275 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5276 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5277 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5278 return rc;
5279}
5280
5281
5282/**
5283 * Frees a usage of a pool page.
5284 *
5285 * @param pVM The cross context VM structure.
5286 * @param HCPhys The HC physical address of the shadow page.
5287 * @param iUser The shadow page pool index of the user table.
5288 * NIL_PGMPOOL_IDX if root page.
5289 * @param iUserTable The index into the user table (shadowed). Ignored if
5290 * root page.
5291 */
5292void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5293{
5294 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5295 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5296 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5297}
5298
5299
5300/**
5301 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5302 *
5303 * @returns Pointer to the shadow page structure.
5304 * @param pPool The pool.
5305 * @param HCPhys The HC physical address of the shadow page.
5306 */
5307PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5308{
5309 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5310
5311 /*
5312 * Look up the page.
5313 */
5314 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5315
5316 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5317 return pPage;
5318}
5319
5320
5321/**
5322 * Internal worker for finding a page for debugging purposes, no assertions.
5323 *
5324 * @returns Pointer to the shadow page structure. NULL on if not found.
5325 * @param pPool The pool.
5326 * @param HCPhys The HC physical address of the shadow page.
5327 */
5328PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5329{
5330 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5331 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5332}
5333
5334#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5335
5336/**
5337 * Flush the specified page if present
5338 *
5339 * @param pVM The cross context VM structure.
5340 * @param GCPhys Guest physical address of the page to flush
5341 */
5342void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5343{
5344 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5345
5346 VM_ASSERT_EMT(pVM);
5347
5348 /*
5349 * Look up the GCPhys in the hash.
5350 */
5351 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5352 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5353 if (i == NIL_PGMPOOL_IDX)
5354 return;
5355
5356 do
5357 {
5358 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5359 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5360 {
5361 switch (pPage->enmKind)
5362 {
5363 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5364 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5365 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5366 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5367 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5368 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5369 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5370 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5371 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5372 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5373 case PGMPOOLKIND_64BIT_PML4:
5374 case PGMPOOLKIND_32BIT_PD:
5375 case PGMPOOLKIND_PAE_PDPT:
5376 {
5377 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5378#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5379 if (pPage->fDirty)
5380 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5381 else
5382#endif
5383 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5384 Assert(!pgmPoolIsPageLocked(pPage));
5385 pgmPoolMonitorChainFlush(pPool, pPage);
5386 return;
5387 }
5388
5389 /* ignore, no monitoring. */
5390 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5391 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5392 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5393 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5394 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5395 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5396 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5397 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5398 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5399 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5400 case PGMPOOLKIND_ROOT_NESTED:
5401 case PGMPOOLKIND_PAE_PD_PHYS:
5402 case PGMPOOLKIND_PAE_PDPT_PHYS:
5403 case PGMPOOLKIND_32BIT_PD_PHYS:
5404 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5405 break;
5406
5407 default:
5408 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5409 }
5410 }
5411
5412 /* next */
5413 i = pPage->iNext;
5414 } while (i != NIL_PGMPOOL_IDX);
5415 return;
5416}
5417
5418#endif /* IN_RING3 */
5419#ifdef IN_RING3
5420
5421/**
5422 * Reset CPU on hot plugging.
5423 *
5424 * @param pVM The cross context VM structure.
5425 * @param pVCpu The cross context virtual CPU structure.
5426 */
5427void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5428{
5429 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5430
5431 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5432 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5433 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5434}
5435
5436
5437/**
5438 * Flushes the entire cache.
5439 *
5440 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5441 * this and execute this CR3 flush.
5442 *
5443 * @param pVM The cross context VM structure.
5444 */
5445void pgmR3PoolReset(PVM pVM)
5446{
5447 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5448
5449 PGM_LOCK_ASSERT_OWNER(pVM);
5450 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5451 LogFlow(("pgmR3PoolReset:\n"));
5452
5453 /*
5454 * If there are no pages in the pool, there is nothing to do.
5455 */
5456 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5457 {
5458 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5459 return;
5460 }
5461
5462 /*
5463 * Exit the shadow mode since we're going to clear everything,
5464 * including the root page.
5465 */
5466 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5467 pgmR3ExitShadowModeBeforePoolFlush(&pVM->aCpus[i]);
5468
5469 /*
5470 * Nuke the free list and reinsert all pages into it.
5471 */
5472 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5473 {
5474 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5475
5476 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
5477 if (pPage->fMonitored)
5478 pgmPoolMonitorFlush(pPool, pPage);
5479 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5480 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5481 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5482 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5483 pPage->GCPhys = NIL_RTGCPHYS;
5484 pPage->enmKind = PGMPOOLKIND_FREE;
5485 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5486 Assert(pPage->idx == i);
5487 pPage->iNext = i + 1;
5488 pPage->fA20Enabled = true;
5489 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5490 pPage->fSeenNonGlobal = false;
5491 pPage->fMonitored = false;
5492 pPage->fDirty = false;
5493 pPage->fCached = false;
5494 pPage->fReusedFlushPending = false;
5495 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5496 pPage->cPresent = 0;
5497 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5498 pPage->cModifications = 0;
5499 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5500 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5501 pPage->idxDirtyEntry = 0;
5502 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5503 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5504 pPage->cLastAccessHandler = 0;
5505 pPage->cLocked = 0;
5506#ifdef VBOX_STRICT
5507 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5508#endif
5509 }
5510 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5511 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5512 pPool->cUsedPages = 0;
5513
5514 /*
5515 * Zap and reinitialize the user records.
5516 */
5517 pPool->cPresent = 0;
5518 pPool->iUserFreeHead = 0;
5519 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5520 const unsigned cMaxUsers = pPool->cMaxUsers;
5521 for (unsigned i = 0; i < cMaxUsers; i++)
5522 {
5523 paUsers[i].iNext = i + 1;
5524 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5525 paUsers[i].iUserTable = 0xfffffffe;
5526 }
5527 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5528
5529 /*
5530 * Clear all the GCPhys links and rebuild the phys ext free list.
5531 */
5532 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
5533 pRam;
5534 pRam = pRam->CTX_SUFF(pNext))
5535 {
5536 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5537 while (iPage-- > 0)
5538 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5539 }
5540
5541 pPool->iPhysExtFreeHead = 0;
5542 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5543 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5544 for (unsigned i = 0; i < cMaxPhysExts; i++)
5545 {
5546 paPhysExts[i].iNext = i + 1;
5547 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5548 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5549 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5550 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5551 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5552 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5553 }
5554 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5555
5556 /*
5557 * Just zap the modified list.
5558 */
5559 pPool->cModifiedPages = 0;
5560 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5561
5562 /*
5563 * Clear the GCPhys hash and the age list.
5564 */
5565 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5566 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5567 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5568 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5569
5570#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5571 /* Clear all dirty pages. */
5572 pPool->idxFreeDirtyPage = 0;
5573 pPool->cDirtyPages = 0;
5574 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
5575 pPool->aDirtyPages[i].uIdx = NIL_PGMPOOL_IDX;
5576#endif
5577
5578 /*
5579 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5580 */
5581 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5582 {
5583 /*
5584 * Re-enter the shadowing mode and assert Sync CR3 FF.
5585 */
5586 PVMCPU pVCpu = &pVM->aCpus[i];
5587 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5588 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5589 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5590 }
5591
5592 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5593}
5594
5595#endif /* IN_RING3 */
5596
5597#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5598/**
5599 * Stringifies a PGMPOOLKIND value.
5600 */
5601static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5602{
5603 switch ((PGMPOOLKIND)enmKind)
5604 {
5605 case PGMPOOLKIND_INVALID:
5606 return "PGMPOOLKIND_INVALID";
5607 case PGMPOOLKIND_FREE:
5608 return "PGMPOOLKIND_FREE";
5609 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5610 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5611 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5612 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5613 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5614 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5615 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5616 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5617 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5618 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5619 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5620 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5621 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5622 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5623 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5624 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5625 case PGMPOOLKIND_32BIT_PD:
5626 return "PGMPOOLKIND_32BIT_PD";
5627 case PGMPOOLKIND_32BIT_PD_PHYS:
5628 return "PGMPOOLKIND_32BIT_PD_PHYS";
5629 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5630 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5631 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5632 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5633 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5634 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5635 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5636 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5637 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5638 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5639 case PGMPOOLKIND_PAE_PD_PHYS:
5640 return "PGMPOOLKIND_PAE_PD_PHYS";
5641 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5642 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5643 case PGMPOOLKIND_PAE_PDPT:
5644 return "PGMPOOLKIND_PAE_PDPT";
5645 case PGMPOOLKIND_PAE_PDPT_PHYS:
5646 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5647 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5648 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5649 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5650 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5651 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5652 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5653 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5654 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5655 case PGMPOOLKIND_64BIT_PML4:
5656 return "PGMPOOLKIND_64BIT_PML4";
5657 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5658 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5659 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5660 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5661 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5662 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5663 case PGMPOOLKIND_ROOT_NESTED:
5664 return "PGMPOOLKIND_ROOT_NESTED";
5665 }
5666 return "Unknown kind!";
5667}
5668#endif /* LOG_ENABLED || VBOX_STRICT */
5669
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette