VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 72129

Last change on this file since 72129 was 71586, checked in by vboxsync, 7 years ago

PGMAllPool: doc fix

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 216.5 KB
Line 
1/* $Id: PGMAllPool.cpp 71586 2018-03-31 12:50:59Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2017 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#include <VBox/vmm/pgm.h>
24#include <VBox/vmm/mm.h>
25#include <VBox/vmm/em.h>
26#include <VBox/vmm/cpum.h>
27#ifdef IN_RC
28# include <VBox/vmm/patm.h>
29#endif
30#include "PGMInternal.h"
31#include <VBox/vmm/vm.h>
32#include "PGMInline.h"
33#include <VBox/disopcode.h>
34#include <VBox/vmm/hm_vmx.h>
35
36#include <VBox/log.h>
37#include <VBox/err.h>
38#include <iprt/asm.h>
39#include <iprt/asm-amd64-x86.h>
40#include <iprt/string.h>
41
42
43/*********************************************************************************************************************************
44* Internal Functions *
45*********************************************************************************************************************************/
46RT_C_DECLS_BEGIN
47#if 0 /* unused */
48DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
49DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
50#endif /* unused */
51static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
52static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
54static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
55#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
56static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
57#endif
58#if 0 /*defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)*/
59static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
60#endif
61
62int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
63PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
64void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
65void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
66
67RT_C_DECLS_END
68
69
70#if 0 /* unused */
71/**
72 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
73 *
74 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
75 * @param enmKind The page kind.
76 */
77DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
78{
79 switch (enmKind)
80 {
81 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
82 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
83 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
84 return true;
85 default:
86 return false;
87 }
88}
89#endif /* unused */
90
91
92/**
93 * Flushes a chain of pages sharing the same access monitor.
94 *
95 * @returns VBox status code suitable for scheduling.
96 * @param pPool The pool.
97 * @param pPage A page in the chain.
98 * @todo VBOXSTRICTRC
99 */
100int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
101{
102 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
103
104 /*
105 * Find the list head.
106 */
107 uint16_t idx = pPage->idx;
108 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
109 {
110 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
111 {
112 idx = pPage->iMonitoredPrev;
113 Assert(idx != pPage->idx);
114 pPage = &pPool->aPages[idx];
115 }
116 }
117
118 /*
119 * Iterate the list flushing each shadow page.
120 */
121 int rc = VINF_SUCCESS;
122 for (;;)
123 {
124 idx = pPage->iMonitoredNext;
125 Assert(idx != pPage->idx);
126 if (pPage->idx >= PGMPOOL_IDX_FIRST)
127 {
128 int rc2 = pgmPoolFlushPage(pPool, pPage);
129 AssertRC(rc2);
130 }
131 /* next */
132 if (idx == NIL_PGMPOOL_IDX)
133 break;
134 pPage = &pPool->aPages[idx];
135 }
136 return rc;
137}
138
139
140/**
141 * Wrapper for getting the current context pointer to the entry being modified.
142 *
143 * @returns VBox status code suitable for scheduling.
144 * @param pVM The cross context VM structure.
145 * @param pvDst Destination address
146 * @param pvSrc Pointer to the mapping of @a GCPhysSrc or NULL depending
147 * on the context (e.g. \#PF in R0 & RC).
148 * @param GCPhysSrc The source guest physical address.
149 * @param cb Size of data to read
150 */
151DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, void const *pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
152{
153#if defined(IN_RING3)
154 NOREF(pVM); NOREF(GCPhysSrc);
155 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
156 return VINF_SUCCESS;
157#else
158 /** @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
159 NOREF(pvSrc);
160 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
161#endif
162}
163
164
165/**
166 * Process shadow entries before they are changed by the guest.
167 *
168 * For PT entries we will clear them. For PD entries, we'll simply check
169 * for mapping conflicts and set the SyncCR3 FF if found.
170 *
171 * @param pVCpu The cross context virtual CPU structure.
172 * @param pPool The pool.
173 * @param pPage The head page.
174 * @param GCPhysFault The guest physical fault address.
175 * @param pvAddress Pointer to the mapping of @a GCPhysFault or NULL
176 * depending on the context (e.g. \#PF in R0 & RC).
177 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
178 */
179static void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault,
180 void const *pvAddress, unsigned cbWrite)
181{
182 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
183 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
184 PVM pVM = pPool->CTX_SUFF(pVM);
185 NOREF(pVCpu);
186
187 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n",
188 (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))(uintptr_t)pvAddress, GCPhysFault, cbWrite));
189
190 for (;;)
191 {
192 union
193 {
194 void *pv;
195 PX86PT pPT;
196 PPGMSHWPTPAE pPTPae;
197 PX86PD pPD;
198 PX86PDPAE pPDPae;
199 PX86PDPT pPDPT;
200 PX86PML4 pPML4;
201 } uShw;
202
203 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s write=%#x\n",
204 pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
205
206 uShw.pv = NULL;
207 switch (pPage->enmKind)
208 {
209 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
210 {
211 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
212 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
213 const unsigned iShw = off / sizeof(X86PTE);
214 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
215 if (uShw.pPT->a[iShw].n.u1Present)
216 {
217 X86PTE GstPte;
218
219 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
220 AssertRC(rc);
221 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
222 pgmPoolTracDerefGCPhysHint(pPool, pPage,
223 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
224 GstPte.u & X86_PTE_PG_MASK,
225 iShw);
226 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
227 }
228 break;
229 }
230
231 /* page/2 sized */
232 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
233 {
234 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
235 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
236 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
237 {
238 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
239 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
240 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
241 {
242 X86PTE GstPte;
243 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
244 AssertRC(rc);
245
246 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
247 pgmPoolTracDerefGCPhysHint(pPool, pPage,
248 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
249 GstPte.u & X86_PTE_PG_MASK,
250 iShw);
251 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
252 }
253 }
254 break;
255 }
256
257 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
258 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
259 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
260 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
261 {
262 unsigned iGst = off / sizeof(X86PDE);
263 unsigned iShwPdpt = iGst / 256;
264 unsigned iShw = (iGst % 256) * 2;
265 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
266
267 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
268 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
269 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
270 {
271 for (unsigned i = 0; i < 2; i++)
272 {
273# ifdef VBOX_WITH_RAW_MODE_NOT_R0
274 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
275 {
276 Assert(pgmMapAreMappingsEnabled(pVM));
277 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
278 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
279 break;
280 }
281# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
282 if (uShw.pPDPae->a[iShw+i].n.u1Present)
283 {
284 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
285 pgmPoolFree(pVM,
286 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
287 pPage->idx,
288 iShw + i);
289 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw+i].u, 0);
290 }
291
292 /* paranoia / a bit assumptive. */
293 if ( (off & 3)
294 && (off & 3) + cbWrite > 4)
295 {
296 const unsigned iShw2 = iShw + 2 + i;
297 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
298 {
299# ifdef VBOX_WITH_RAW_MODE_NOT_R0
300 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
301 {
302 Assert(pgmMapAreMappingsEnabled(pVM));
303 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
304 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
305 break;
306 }
307# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
308 if (uShw.pPDPae->a[iShw2].n.u1Present)
309 {
310 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
311 pgmPoolFree(pVM,
312 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
313 pPage->idx,
314 iShw2);
315 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
316 }
317 }
318 }
319 }
320 }
321 break;
322 }
323
324 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
325 {
326 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
327 const unsigned iShw = off / sizeof(X86PTEPAE);
328 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
329 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
330 {
331 X86PTEPAE GstPte;
332 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
333 AssertRC(rc);
334
335 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
336 pgmPoolTracDerefGCPhysHint(pPool, pPage,
337 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
338 GstPte.u & X86_PTE_PAE_PG_MASK,
339 iShw);
340 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
341 }
342
343 /* paranoia / a bit assumptive. */
344 if ( (off & 7)
345 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
346 {
347 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
348 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
349
350 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
351 {
352 X86PTEPAE GstPte;
353 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
354 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
355 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
356 AssertRC(rc);
357 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
358 pgmPoolTracDerefGCPhysHint(pPool, pPage,
359 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
360 GstPte.u & X86_PTE_PAE_PG_MASK,
361 iShw2);
362 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
363 }
364 }
365 break;
366 }
367
368 case PGMPOOLKIND_32BIT_PD:
369 {
370 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
371 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
372
373 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
374 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
375# ifdef VBOX_WITH_RAW_MODE_NOT_R0
376 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
377 {
378 Assert(pgmMapAreMappingsEnabled(pVM));
379 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
380 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
381 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
382 break;
383 }
384 else
385# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
386 {
387 if (uShw.pPD->a[iShw].n.u1Present)
388 {
389 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
390 pgmPoolFree(pVM,
391 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
392 pPage->idx,
393 iShw);
394 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
395 }
396 }
397 /* paranoia / a bit assumptive. */
398 if ( (off & 3)
399 && (off & 3) + cbWrite > sizeof(X86PTE))
400 {
401 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
402 if ( iShw2 != iShw
403 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
404 {
405# ifdef VBOX_WITH_RAW_MODE_NOT_R0
406 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
407 {
408 Assert(pgmMapAreMappingsEnabled(pVM));
409 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
410 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
411 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
412 break;
413 }
414# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
415 if (uShw.pPD->a[iShw2].n.u1Present)
416 {
417 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
418 pgmPoolFree(pVM,
419 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
420 pPage->idx,
421 iShw2);
422 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
423 }
424 }
425 }
426#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). - not working any longer... */
427 if ( uShw.pPD->a[iShw].n.u1Present
428 && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
429 {
430 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
431# ifdef IN_RC /* TLB load - we're pushing things a bit... */
432 ASMProbeReadByte(pvAddress);
433# endif
434 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
435 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
436 }
437#endif
438 break;
439 }
440
441 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
442 {
443 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
444 const unsigned iShw = off / sizeof(X86PDEPAE);
445 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
446#ifdef VBOX_WITH_RAW_MODE_NOT_R0
447 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
448 {
449 Assert(pgmMapAreMappingsEnabled(pVM));
450 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
451 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
452 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
453 break;
454 }
455#endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
456 /*
457 * Causes trouble when the guest uses a PDE to refer to the whole page table level
458 * structure. (Invalidate here; faults later on when it tries to change the page
459 * table entries -> recheck; probably only applies to the RC case.)
460 */
461#ifdef VBOX_WITH_RAW_MODE_NOT_R0
462 else
463#endif
464 {
465 if (uShw.pPDPae->a[iShw].n.u1Present)
466 {
467 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
468 pgmPoolFree(pVM,
469 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
470 pPage->idx,
471 iShw);
472 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
473 }
474 }
475 /* paranoia / a bit assumptive. */
476 if ( (off & 7)
477 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
478 {
479 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
480 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
481
482#ifdef VBOX_WITH_RAW_MODE_NOT_R0
483 if ( iShw2 != iShw
484 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
485 {
486 Assert(pgmMapAreMappingsEnabled(pVM));
487 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
488 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
489 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
490 break;
491 }
492 else
493#endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
494 if (uShw.pPDPae->a[iShw2].n.u1Present)
495 {
496 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
497 pgmPoolFree(pVM,
498 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
499 pPage->idx,
500 iShw2);
501 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
502 }
503 }
504 break;
505 }
506
507 case PGMPOOLKIND_PAE_PDPT:
508 {
509 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
510 /*
511 * Hopefully this doesn't happen very often:
512 * - touching unused parts of the page
513 * - messing with the bits of pd pointers without changing the physical address
514 */
515 /* PDPT roots are not page aligned; 32 byte only! */
516 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
517
518 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
519 const unsigned iShw = offPdpt / sizeof(X86PDPE);
520 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
521 {
522# ifdef VBOX_WITH_RAW_MODE_NOT_R0
523 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
524 {
525 Assert(pgmMapAreMappingsEnabled(pVM));
526 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
527 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
528 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
529 break;
530 }
531 else
532# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
533 if (uShw.pPDPT->a[iShw].n.u1Present)
534 {
535 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
536 pgmPoolFree(pVM,
537 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
538 pPage->idx,
539 iShw);
540 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
541 }
542
543 /* paranoia / a bit assumptive. */
544 if ( (offPdpt & 7)
545 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
546 {
547 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
548 if ( iShw2 != iShw
549 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
550 {
551# ifdef VBOX_WITH_RAW_MODE_NOT_R0
552 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
553 {
554 Assert(pgmMapAreMappingsEnabled(pVM));
555 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
556 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
557 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
558 break;
559 }
560 else
561# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
562 if (uShw.pPDPT->a[iShw2].n.u1Present)
563 {
564 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
565 pgmPoolFree(pVM,
566 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
567 pPage->idx,
568 iShw2);
569 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
570 }
571 }
572 }
573 }
574 break;
575 }
576
577#ifndef IN_RC
578 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
579 {
580 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
581 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
582 const unsigned iShw = off / sizeof(X86PDEPAE);
583 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
584 if (uShw.pPDPae->a[iShw].n.u1Present)
585 {
586 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
587 pgmPoolFree(pVM,
588 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
589 pPage->idx,
590 iShw);
591 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
592 }
593 /* paranoia / a bit assumptive. */
594 if ( (off & 7)
595 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
596 {
597 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
598 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
599
600 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
601 if (uShw.pPDPae->a[iShw2].n.u1Present)
602 {
603 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
604 pgmPoolFree(pVM,
605 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
606 pPage->idx,
607 iShw2);
608 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
609 }
610 }
611 break;
612 }
613
614 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
615 {
616 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
617 /*
618 * Hopefully this doesn't happen very often:
619 * - messing with the bits of pd pointers without changing the physical address
620 */
621 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
622 const unsigned iShw = off / sizeof(X86PDPE);
623 if (uShw.pPDPT->a[iShw].n.u1Present)
624 {
625 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
626 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
627 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
628 }
629 /* paranoia / a bit assumptive. */
630 if ( (off & 7)
631 && (off & 7) + cbWrite > sizeof(X86PDPE))
632 {
633 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
634 if (uShw.pPDPT->a[iShw2].n.u1Present)
635 {
636 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
637 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
638 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
639 }
640 }
641 break;
642 }
643
644 case PGMPOOLKIND_64BIT_PML4:
645 {
646 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
647 /*
648 * Hopefully this doesn't happen very often:
649 * - messing with the bits of pd pointers without changing the physical address
650 */
651 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
652 const unsigned iShw = off / sizeof(X86PDPE);
653 if (uShw.pPML4->a[iShw].n.u1Present)
654 {
655 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
656 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
657 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
658 }
659 /* paranoia / a bit assumptive. */
660 if ( (off & 7)
661 && (off & 7) + cbWrite > sizeof(X86PDPE))
662 {
663 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
664 if (uShw.pPML4->a[iShw2].n.u1Present)
665 {
666 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
667 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
668 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
669 }
670 }
671 break;
672 }
673#endif /* IN_RING0 */
674
675 default:
676 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
677 }
678 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
679
680 /* next */
681 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
682 return;
683 pPage = &pPool->aPages[pPage->iMonitoredNext];
684 }
685}
686
687#ifndef IN_RING3
688
689/**
690 * Checks if a access could be a fork operation in progress.
691 *
692 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
693 *
694 * @returns true if it's likely that we're forking, otherwise false.
695 * @param pPool The pool.
696 * @param pDis The disassembled instruction.
697 * @param offFault The access offset.
698 */
699DECLINLINE(bool) pgmRZPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
700{
701 /*
702 * i386 linux is using btr to clear X86_PTE_RW.
703 * The functions involved are (2.6.16 source inspection):
704 * clear_bit
705 * ptep_set_wrprotect
706 * copy_one_pte
707 * copy_pte_range
708 * copy_pmd_range
709 * copy_pud_range
710 * copy_page_range
711 * dup_mmap
712 * dup_mm
713 * copy_mm
714 * copy_process
715 * do_fork
716 */
717 if ( pDis->pCurInstr->uOpcode == OP_BTR
718 && !(offFault & 4)
719 /** @todo Validate that the bit index is X86_PTE_RW. */
720 )
721 {
722 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,Fork)); RT_NOREF_PV(pPool);
723 return true;
724 }
725 return false;
726}
727
728
729/**
730 * Determine whether the page is likely to have been reused.
731 *
732 * @returns true if we consider the page as being reused for a different purpose.
733 * @returns false if we consider it to still be a paging page.
734 * @param pVM The cross context VM structure.
735 * @param pVCpu The cross context virtual CPU structure.
736 * @param pRegFrame Trap register frame.
737 * @param pDis The disassembly info for the faulting instruction.
738 * @param pvFault The fault address.
739 * @param pPage The pool page being accessed.
740 *
741 * @remark The REP prefix check is left to the caller because of STOSD/W.
742 */
743DECLINLINE(bool) pgmRZPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault,
744 PPGMPOOLPAGE pPage)
745{
746 /* Locked (CR3, PDPTR*4) should not be reusable. Considering them as
747 such may cause loops booting tst-ubuntu-15_10-64-efi, ++. */
748 if (pPage->cLocked)
749 {
750 Log2(("pgmRZPoolMonitorIsReused: %RGv (%p) can't have been resued, because it's locked!\n", pvFault, pPage));
751 return false;
752 }
753
754# ifndef IN_RC
755 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
756 if ( HMHasPendingIrq(pVM)
757 && (pRegFrame->rsp - pvFault) < 32)
758 {
759 /* Fault caused by stack writes while trying to inject an interrupt event. */
760 Log(("pgmRZPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
761 return true;
762 }
763# else
764 NOREF(pVM); NOREF(pvFault);
765# endif
766
767 LogFlow(("Reused instr %RGv %d at %RGv param1.fUse=%llx param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->uOpcode, pvFault, pDis->Param1.fUse, pDis->Param1.Base.idxGenReg));
768
769 /* Non-supervisor mode write means it's used for something else. */
770 if (CPUMGetGuestCPL(pVCpu) == 3)
771 return true;
772
773 switch (pDis->pCurInstr->uOpcode)
774 {
775 /* call implies the actual push of the return address faulted */
776 case OP_CALL:
777 Log4(("pgmRZPoolMonitorIsReused: CALL\n"));
778 return true;
779 case OP_PUSH:
780 Log4(("pgmRZPoolMonitorIsReused: PUSH\n"));
781 return true;
782 case OP_PUSHF:
783 Log4(("pgmRZPoolMonitorIsReused: PUSHF\n"));
784 return true;
785 case OP_PUSHA:
786 Log4(("pgmRZPoolMonitorIsReused: PUSHA\n"));
787 return true;
788 case OP_FXSAVE:
789 Log4(("pgmRZPoolMonitorIsReused: FXSAVE\n"));
790 return true;
791 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
792 Log4(("pgmRZPoolMonitorIsReused: MOVNTI\n"));
793 return true;
794 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
795 Log4(("pgmRZPoolMonitorIsReused: MOVNTDQ\n"));
796 return true;
797 case OP_MOVSWD:
798 case OP_STOSWD:
799 if ( pDis->fPrefix == (DISPREFIX_REP|DISPREFIX_REX)
800 && pRegFrame->rcx >= 0x40
801 )
802 {
803 Assert(pDis->uCpuMode == DISCPUMODE_64BIT);
804
805 Log(("pgmRZPoolMonitorIsReused: OP_STOSQ\n"));
806 return true;
807 }
808 break;
809
810 default:
811 /*
812 * Anything having ESP on the left side means stack writes.
813 */
814 if ( ( (pDis->Param1.fUse & DISUSE_REG_GEN32)
815 || (pDis->Param1.fUse & DISUSE_REG_GEN64))
816 && (pDis->Param1.Base.idxGenReg == DISGREG_ESP))
817 {
818 Log4(("pgmRZPoolMonitorIsReused: ESP\n"));
819 return true;
820 }
821 break;
822 }
823
824 /*
825 * Page table updates are very very unlikely to be crossing page boundraries,
826 * and we don't want to deal with that in pgmPoolMonitorChainChanging and such.
827 */
828 uint32_t const cbWrite = DISGetParamSize(pDis, &pDis->Param1);
829 if ( (((uintptr_t)pvFault + cbWrite) >> X86_PAGE_SHIFT) != ((uintptr_t)pvFault >> X86_PAGE_SHIFT) )
830 {
831 Log4(("pgmRZPoolMonitorIsReused: cross page write\n"));
832 return true;
833 }
834
835 /*
836 * Nobody does an unaligned 8 byte write to a page table, right.
837 */
838 if (cbWrite >= 8 && ((uintptr_t)pvFault & 7) != 0)
839 {
840 Log4(("pgmRZPoolMonitorIsReused: Unaligned 8+ byte write\n"));
841 return true;
842 }
843
844 return false;
845}
846
847
848/**
849 * Flushes the page being accessed.
850 *
851 * @returns VBox status code suitable for scheduling.
852 * @param pVM The cross context VM structure.
853 * @param pVCpu The cross context virtual CPU structure.
854 * @param pPool The pool.
855 * @param pPage The pool page (head).
856 * @param pDis The disassembly of the write instruction.
857 * @param pRegFrame The trap register frame.
858 * @param GCPhysFault The fault address as guest physical address.
859 * @param pvFault The fault address.
860 * @todo VBOXSTRICTRC
861 */
862static int pgmRZPoolAccessPfHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
863 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
864{
865 NOREF(pVM); NOREF(GCPhysFault);
866
867 /*
868 * First, do the flushing.
869 */
870 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
871
872 /*
873 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
874 * Must do this in raw mode (!); XP boot will fail otherwise.
875 */
876 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
877 if (rc2 == VINF_SUCCESS)
878 { /* do nothing */ }
879 else if (rc2 == VINF_EM_RESCHEDULE)
880 {
881 if (rc == VINF_SUCCESS)
882 rc = VBOXSTRICTRC_VAL(rc2);
883# ifndef IN_RING3
884 VMCPU_FF_SET(pVCpu, VMCPU_FF_TO_R3);
885# endif
886 }
887 else if (rc2 == VERR_EM_INTERPRETER)
888 {
889# ifdef IN_RC
890 if (PATMIsPatchGCAddr(pVM, pRegFrame->eip))
891 {
892 LogFlow(("pgmRZPoolAccessPfHandlerFlush: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
893 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->eip));
894 rc = VINF_SUCCESS;
895 STAM_COUNTER_INC(&pPool->StatMonitorPfRZIntrFailPatch2);
896 }
897 else
898# endif
899 {
900 rc = VINF_EM_RAW_EMULATE_INSTR;
901 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
902 }
903 }
904 else if (RT_FAILURE_NP(rc2))
905 rc = VBOXSTRICTRC_VAL(rc2);
906 else
907 AssertMsgFailed(("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
908
909 LogFlow(("pgmRZPoolAccessPfHandlerFlush: returns %Rrc (flushed)\n", rc));
910 return rc;
911}
912
913
914/**
915 * Handles the STOSD write accesses.
916 *
917 * @returns VBox status code suitable for scheduling.
918 * @param pVM The cross context VM structure.
919 * @param pPool The pool.
920 * @param pPage The pool page (head).
921 * @param pDis The disassembly of the write instruction.
922 * @param pRegFrame The trap register frame.
923 * @param GCPhysFault The fault address as guest physical address.
924 * @param pvFault The fault address.
925 */
926DECLINLINE(int) pgmRZPoolAccessPfHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
927 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
928{
929 unsigned uIncrement = pDis->Param1.cb;
930 NOREF(pVM);
931
932 Assert(pDis->uCpuMode == DISCPUMODE_32BIT || pDis->uCpuMode == DISCPUMODE_64BIT);
933 Assert(pRegFrame->rcx <= 0x20);
934
935# ifdef VBOX_STRICT
936 if (pDis->uOpMode == DISCPUMODE_32BIT)
937 Assert(uIncrement == 4);
938 else
939 Assert(uIncrement == 8);
940# endif
941
942 Log3(("pgmRZPoolAccessPfHandlerSTOSD\n"));
943
944 /*
945 * Increment the modification counter and insert it into the list
946 * of modified pages the first time.
947 */
948 if (!pPage->cModifications++)
949 pgmPoolMonitorModifiedInsert(pPool, pPage);
950
951 /*
952 * Execute REP STOSD.
953 *
954 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
955 * write situation, meaning that it's safe to write here.
956 */
957 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
958 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
959 while (pRegFrame->rcx)
960 {
961# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
962 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
963 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
964 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
965# else
966 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
967# endif
968# ifdef IN_RC
969 *(uint32_t *)(uintptr_t)pu32 = pRegFrame->eax;
970# else
971 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
972# endif
973 pu32 += uIncrement;
974 GCPhysFault += uIncrement;
975 pRegFrame->rdi += uIncrement;
976 pRegFrame->rcx--;
977 }
978 pRegFrame->rip += pDis->cbInstr;
979
980 LogFlow(("pgmRZPoolAccessPfHandlerSTOSD: returns\n"));
981 return VINF_SUCCESS;
982}
983
984
985/**
986 * Handles the simple write accesses.
987 *
988 * @returns VBox status code suitable for scheduling.
989 * @param pVM The cross context VM structure.
990 * @param pVCpu The cross context virtual CPU structure.
991 * @param pPool The pool.
992 * @param pPage The pool page (head).
993 * @param pDis The disassembly of the write instruction.
994 * @param pRegFrame The trap register frame.
995 * @param GCPhysFault The fault address as guest physical address.
996 * @param pvFault The fault address.
997 * @param pfReused Reused state (in/out)
998 */
999DECLINLINE(int) pgmRZPoolAccessPfHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
1000 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
1001{
1002 Log3(("pgmRZPoolAccessPfHandlerSimple\n"));
1003 NOREF(pVM);
1004 NOREF(pfReused); /* initialized by caller */
1005
1006 /*
1007 * Increment the modification counter and insert it into the list
1008 * of modified pages the first time.
1009 */
1010 if (!pPage->cModifications++)
1011 pgmPoolMonitorModifiedInsert(pPool, pPage);
1012
1013 /*
1014 * Clear all the pages. ASSUMES that pvFault is readable.
1015 */
1016# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1017 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
1018# endif
1019
1020 uint32_t cbWrite = DISGetParamSize(pDis, &pDis->Param1);
1021 if (cbWrite <= 8)
1022 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, cbWrite);
1023 else if (cbWrite <= 16)
1024 {
1025 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, 8);
1026 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + 8, NULL, cbWrite - 8);
1027 }
1028 else
1029 {
1030 Assert(cbWrite <= 32);
1031 for (uint32_t off = 0; off < cbWrite; off += 8)
1032 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + off, NULL, RT_MIN(8, cbWrite - off));
1033 }
1034
1035# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1036 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
1037# endif
1038
1039 /*
1040 * Interpret the instruction.
1041 */
1042 VBOXSTRICTRC rc = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
1043 if (RT_SUCCESS(rc))
1044 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
1045 else if (rc == VERR_EM_INTERPRETER)
1046 {
1047 LogFlow(("pgmRZPoolAccessPfHandlerSimple: Interpretation failed for %04x:%RGv - opcode=%d\n",
1048 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode));
1049 rc = VINF_EM_RAW_EMULATE_INSTR;
1050 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
1051 }
1052
1053# if 0 /* experimental code */
1054 if (rc == VINF_SUCCESS)
1055 {
1056 switch (pPage->enmKind)
1057 {
1058 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1059 {
1060 X86PTEPAE GstPte;
1061 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1062 AssertRC(rc);
1063
1064 /* Check the new value written by the guest. If present and with a bogus physical address, then
1065 * it's fairly safe to assume the guest is reusing the PT.
1066 */
1067 if (GstPte.n.u1Present)
1068 {
1069 RTHCPHYS HCPhys = -1;
1070 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1071 if (rc != VINF_SUCCESS)
1072 {
1073 *pfReused = true;
1074 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1075 }
1076 }
1077 break;
1078 }
1079 }
1080 }
1081# endif
1082
1083 LogFlow(("pgmRZPoolAccessPfHandlerSimple: returns %Rrc\n", VBOXSTRICTRC_VAL(rc)));
1084 return VBOXSTRICTRC_VAL(rc);
1085}
1086
1087
1088/**
1089 * @callback_method_impl{FNPGMRZPHYSPFHANDLER,
1090 * \#PF access handler callback for page table pages.}
1091 *
1092 * @remarks The @a pvUser argument points to the PGMPOOLPAGE.
1093 */
1094DECLEXPORT(VBOXSTRICTRC) pgmRZPoolAccessPfHandler(PVM pVM, PVMCPU pVCpu, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame,
1095 RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1096{
1097 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorRZ, a);
1098 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1099 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1100 unsigned cMaxModifications;
1101 bool fForcedFlush = false;
1102 NOREF(uErrorCode);
1103
1104 LogFlow(("pgmRZPoolAccessPfHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1105
1106 pgmLock(pVM);
1107 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1108 {
1109 /* Pool page changed while we were waiting for the lock; ignore. */
1110 Log(("CPU%d: pgmRZPoolAccessPfHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1111 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
1112 pgmUnlock(pVM);
1113 return VINF_SUCCESS;
1114 }
1115# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1116 if (pPage->fDirty)
1117 {
1118 Assert(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH));
1119 pgmUnlock(pVM);
1120 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1121 }
1122# endif
1123
1124# if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1125 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1126 {
1127 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1128 void *pvGst;
1129 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1130 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1131 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1132 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1133 }
1134# endif
1135
1136 /*
1137 * Disassemble the faulting instruction.
1138 */
1139 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1140 int rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
1141 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1142 {
1143 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1144 pgmUnlock(pVM);
1145 return rc;
1146 }
1147
1148 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1149
1150 /*
1151 * We should ALWAYS have the list head as user parameter. This
1152 * is because we use that page to record the changes.
1153 */
1154 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1155
1156# ifdef IN_RING0
1157 /* Maximum nr of modifications depends on the page type. */
1158 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1159 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1160 cMaxModifications = 4;
1161 else
1162 cMaxModifications = 24;
1163# else
1164 cMaxModifications = 48;
1165# endif
1166
1167 /*
1168 * Incremental page table updates should weigh more than random ones.
1169 * (Only applies when started from offset 0)
1170 */
1171 pVCpu->pgm.s.cPoolAccessHandler++;
1172 if ( pPage->GCPtrLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1173 && pPage->GCPtrLastAccessHandlerRip < pRegFrame->rip + 0x40
1174 && pvFault == (pPage->GCPtrLastAccessHandlerFault + pDis->Param1.cb)
1175 && pVCpu->pgm.s.cPoolAccessHandler == pPage->cLastAccessHandler + 1)
1176 {
1177 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1178 Assert(pPage->cModifications < 32000);
1179 pPage->cModifications = pPage->cModifications * 2;
1180 pPage->GCPtrLastAccessHandlerFault = pvFault;
1181 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1182 if (pPage->cModifications >= cMaxModifications)
1183 {
1184 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushReinit);
1185 fForcedFlush = true;
1186 }
1187 }
1188
1189 if (pPage->cModifications >= cMaxModifications)
1190 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1191
1192 /*
1193 * Check if it's worth dealing with.
1194 */
1195 bool fReused = false;
1196 bool fNotReusedNotForking = false;
1197 if ( ( pPage->cModifications < cMaxModifications /** @todo \#define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1198 || pgmPoolIsPageLocked(pPage)
1199 )
1200 && !(fReused = pgmRZPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault, pPage))
1201 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1202 {
1203 /*
1204 * Simple instructions, no REP prefix.
1205 */
1206 if (!(pDis->fPrefix & (DISPREFIX_REP | DISPREFIX_REPNE)))
1207 {
1208 rc = pgmRZPoolAccessPfHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1209 if (fReused)
1210 goto flushPage;
1211
1212 /* A mov instruction to change the first page table entry will be remembered so we can detect
1213 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1214 */
1215 if ( rc == VINF_SUCCESS
1216 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1217 && pDis->pCurInstr->uOpcode == OP_MOV
1218 && (pvFault & PAGE_OFFSET_MASK) == 0)
1219 {
1220 pPage->GCPtrLastAccessHandlerFault = pvFault;
1221 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1222 pPage->GCPtrLastAccessHandlerRip = pRegFrame->rip;
1223 /* Make sure we don't kick out a page too quickly. */
1224 if (pPage->cModifications > 8)
1225 pPage->cModifications = 2;
1226 }
1227 else if (pPage->GCPtrLastAccessHandlerFault == pvFault)
1228 {
1229 /* ignore the 2nd write to this page table entry. */
1230 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1231 }
1232 else
1233 {
1234 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
1235 pPage->GCPtrLastAccessHandlerRip = 0;
1236 }
1237
1238 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
1239 pgmUnlock(pVM);
1240 return rc;
1241 }
1242
1243 /*
1244 * Windows is frequently doing small memset() operations (netio test 4k+).
1245 * We have to deal with these or we'll kill the cache and performance.
1246 */
1247 if ( pDis->pCurInstr->uOpcode == OP_STOSWD
1248 && !pRegFrame->eflags.Bits.u1DF
1249 && pDis->uOpMode == pDis->uCpuMode
1250 && pDis->uAddrMode == pDis->uCpuMode)
1251 {
1252 bool fValidStosd = false;
1253
1254 if ( pDis->uCpuMode == DISCPUMODE_32BIT
1255 && pDis->fPrefix == DISPREFIX_REP
1256 && pRegFrame->ecx <= 0x20
1257 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1258 && !((uintptr_t)pvFault & 3)
1259 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1260 )
1261 {
1262 fValidStosd = true;
1263 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1264 }
1265 else
1266 if ( pDis->uCpuMode == DISCPUMODE_64BIT
1267 && pDis->fPrefix == (DISPREFIX_REP | DISPREFIX_REX)
1268 && pRegFrame->rcx <= 0x20
1269 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1270 && !((uintptr_t)pvFault & 7)
1271 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1272 )
1273 {
1274 fValidStosd = true;
1275 }
1276
1277 if (fValidStosd)
1278 {
1279 rc = pgmRZPoolAccessPfHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1280 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZRepStosd, a);
1281 pgmUnlock(pVM);
1282 return rc;
1283 }
1284 }
1285
1286 /* REP prefix, don't bother. */
1287 STAM_COUNTER_INC(&pPool->StatMonitorPfRZRepPrefix);
1288 Log4(("pgmRZPoolAccessPfHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1289 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode, pDis->fPrefix));
1290 fNotReusedNotForking = true;
1291 }
1292
1293# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1294 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1295 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1296 */
1297 if ( pPage->cModifications >= cMaxModifications
1298 && !fForcedFlush
1299 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1300 && ( fNotReusedNotForking
1301 || ( !pgmRZPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault, pPage)
1302 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1303 )
1304 )
1305 {
1306 Assert(!pgmPoolIsPageLocked(pPage));
1307 Assert(pPage->fDirty == false);
1308
1309 /* Flush any monitored duplicates as we will disable write protection. */
1310 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1311 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1312 {
1313 PPGMPOOLPAGE pPageHead = pPage;
1314
1315 /* Find the monitor head. */
1316 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1317 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1318
1319 while (pPageHead)
1320 {
1321 unsigned idxNext = pPageHead->iMonitoredNext;
1322
1323 if (pPageHead != pPage)
1324 {
1325 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1326 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1327 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1328 AssertRC(rc2);
1329 }
1330
1331 if (idxNext == NIL_PGMPOOL_IDX)
1332 break;
1333
1334 pPageHead = &pPool->aPages[idxNext];
1335 }
1336 }
1337
1338 /* The flushing above might fail for locked pages, so double check. */
1339 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1340 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1341 {
1342 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1343
1344 /* Temporarily allow write access to the page table again. */
1345 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK, pPage->GCPhys & PAGE_BASE_GC_MASK);
1346 if (rc == VINF_SUCCESS)
1347 {
1348 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1349 AssertMsg(rc == VINF_SUCCESS
1350 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1351 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1352 || rc == VERR_PAGE_NOT_PRESENT,
1353 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1354# ifdef VBOX_STRICT
1355 pPage->GCPtrDirtyFault = pvFault;
1356# endif
1357
1358 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, a);
1359 pgmUnlock(pVM);
1360 return rc;
1361 }
1362 }
1363 }
1364# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1365
1366 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushModOverflow);
1367flushPage:
1368 /*
1369 * Not worth it, so flush it.
1370 *
1371 * If we considered it to be reused, don't go back to ring-3
1372 * to emulate failed instructions since we usually cannot
1373 * interpret then. This may be a bit risky, in which case
1374 * the reuse detection must be fixed.
1375 */
1376 rc = pgmRZPoolAccessPfHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1377 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1378 && fReused)
1379 {
1380 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1381 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1382 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1383 }
1384 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZFlushPage, a);
1385 pgmUnlock(pVM);
1386 return rc;
1387}
1388
1389#endif /* !IN_RING3 */
1390
1391/**
1392 * @callback_method_impl{FNPGMPHYSHANDLER,
1393 * Access handler for shadowed page table pages.}
1394 *
1395 * @remarks Only uses the VINF_PGM_HANDLER_DO_DEFAULT status.
1396 */
1397PGM_ALL_CB2_DECL(VBOXSTRICTRC)
1398pgmPoolAccessHandler(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf,
1399 PGMACCESSTYPE enmAccessType, PGMACCESSORIGIN enmOrigin, void *pvUser)
1400{
1401 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1402 STAM_PROFILE_START(&pPool->CTX_SUFF_Z(StatMonitor), a);
1403 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1404 LogFlow(("PGM_ALL_CB_DECL: GCPhys=%RGp %p:{.Core=%RHp, .idx=%d, .GCPhys=%RGp, .enmType=%d}\n",
1405 GCPhys, pPage, pPage->Core.Key, pPage->idx, pPage->GCPhys, pPage->enmKind));
1406
1407 NOREF(pvPhys); NOREF(pvBuf); NOREF(enmAccessType);
1408
1409 pgmLock(pVM);
1410
1411#ifdef VBOX_WITH_STATISTICS
1412 /*
1413 * Collect stats on the access.
1414 */
1415 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Sizes)) == 19);
1416 if (cbBuf <= 16 && cbBuf > 0)
1417 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[cbBuf - 1]);
1418 else if (cbBuf >= 17 && cbBuf < 32)
1419 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[16]);
1420 else if (cbBuf >= 32 && cbBuf < 64)
1421 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[17]);
1422 else if (cbBuf >= 64)
1423 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[18]);
1424
1425 uint8_t cbAlign;
1426 switch (pPage->enmKind)
1427 {
1428 default:
1429 cbAlign = 7;
1430 break;
1431 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1432 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1433 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1434 case PGMPOOLKIND_32BIT_PD:
1435 case PGMPOOLKIND_32BIT_PD_PHYS:
1436 cbAlign = 3;
1437 break;
1438 }
1439 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Misaligned)) == 7);
1440 if ((uint8_t)GCPhys & cbAlign)
1441 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Misaligned)[((uint8_t)GCPhys & cbAlign) - 1]);
1442#endif
1443
1444 /*
1445 * Make sure the pool page wasn't modified by a different CPU.
1446 */
1447 if (PHYS_PAGE_ADDRESS(GCPhys) == PHYS_PAGE_ADDRESS(pPage->GCPhys))
1448 {
1449 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1450
1451 /* The max modification count before flushing depends on the context and page type. */
1452#ifdef IN_RING3
1453 uint16_t const cMaxModifications = 96; /* it's cheaper here, right? */
1454#else
1455 uint16_t cMaxModifications;
1456 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1457 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1458 cMaxModifications = 4;
1459 else
1460 cMaxModifications = 24;
1461# ifdef IN_RC
1462 cMaxModifications *= 2; /* traps are cheaper than exists. */
1463# endif
1464#endif
1465
1466 /*
1467 * We don't have to be very sophisticated about this since there are relativly few calls here.
1468 * However, we must try our best to detect any non-cpu accesses (disk / networking).
1469 */
1470 if ( ( pPage->cModifications < cMaxModifications
1471 || pgmPoolIsPageLocked(pPage) )
1472 && enmOrigin != PGMACCESSORIGIN_DEVICE
1473 && cbBuf <= 16)
1474 {
1475 /* Clear the shadow entry. */
1476 if (!pPage->cModifications++)
1477 pgmPoolMonitorModifiedInsert(pPool, pPage);
1478
1479 if (cbBuf <= 8)
1480 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, (uint32_t)cbBuf);
1481 else
1482 {
1483 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, 8);
1484 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys + 8, (uint8_t *)pvBuf + 8, (uint32_t)cbBuf - 8);
1485 }
1486 }
1487 else
1488 {
1489 /* ASSUME that VERR_PGM_POOL_CLEARED can be ignored here and that FFs will deal with it in due time. */
1490 pgmPoolMonitorChainFlush(pPool, pPage);
1491 }
1492
1493 STAM_PROFILE_STOP_EX(&pPool->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1494 }
1495 else
1496 Log(("CPU%d: PGM_ALL_CB_DECL pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhys), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1497 pgmUnlock(pVM);
1498 return VINF_PGM_HANDLER_DO_DEFAULT;
1499}
1500
1501
1502# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1503
1504# if defined(VBOX_STRICT) && !defined(IN_RING3)
1505
1506/**
1507 * Check references to guest physical memory in a PAE / PAE page table.
1508 *
1509 * @param pPool The pool.
1510 * @param pPage The page.
1511 * @param pShwPT The shadow page table (mapping of the page).
1512 * @param pGstPT The guest page table.
1513 */
1514static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1515{
1516 unsigned cErrors = 0;
1517 int LastRc = -1; /* initialized to shut up gcc */
1518 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1519 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1520 PVM pVM = pPool->CTX_SUFF(pVM);
1521
1522#ifdef VBOX_STRICT
1523 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1524 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1525#endif
1526 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1527 {
1528 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1529 {
1530 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1531 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1532 if ( rc != VINF_SUCCESS
1533 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1534 {
1535 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1536 LastPTE = i;
1537 LastRc = rc;
1538 LastHCPhys = HCPhys;
1539 cErrors++;
1540
1541 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1542 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1543 AssertRC(rc);
1544
1545 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1546 {
1547 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1548
1549 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1550 {
1551 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1552
1553 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1554 {
1555 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1556 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1557 {
1558 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1559 }
1560 }
1561
1562 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1563 }
1564 }
1565 }
1566 }
1567 }
1568 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1569}
1570
1571
1572/**
1573 * Check references to guest physical memory in a PAE / 32-bit page table.
1574 *
1575 * @param pPool The pool.
1576 * @param pPage The page.
1577 * @param pShwPT The shadow page table (mapping of the page).
1578 * @param pGstPT The guest page table.
1579 */
1580static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1581{
1582 unsigned cErrors = 0;
1583 int LastRc = -1; /* initialized to shut up gcc */
1584 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1585 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1586 PVM pVM = pPool->CTX_SUFF(pVM);
1587
1588#ifdef VBOX_STRICT
1589 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1590 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1591#endif
1592 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1593 {
1594 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1595 {
1596 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1597 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1598 if ( rc != VINF_SUCCESS
1599 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1600 {
1601 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1602 LastPTE = i;
1603 LastRc = rc;
1604 LastHCPhys = HCPhys;
1605 cErrors++;
1606
1607 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1608 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1609 AssertRC(rc);
1610
1611 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1612 {
1613 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1614
1615 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1616 {
1617 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1618
1619 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1620 {
1621 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1622 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1623 {
1624 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1625 }
1626 }
1627
1628 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1629 }
1630 }
1631 }
1632 }
1633 }
1634 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1635}
1636
1637# endif /* VBOX_STRICT && !IN_RING3 */
1638
1639/**
1640 * Clear references to guest physical memory in a PAE / PAE page table.
1641 *
1642 * @returns nr of changed PTEs
1643 * @param pPool The pool.
1644 * @param pPage The page.
1645 * @param pShwPT The shadow page table (mapping of the page).
1646 * @param pGstPT The guest page table.
1647 * @param pOldGstPT The old cached guest page table.
1648 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1649 * @param pfFlush Flush reused page table (out)
1650 */
1651DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1652 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1653{
1654 unsigned cChanged = 0;
1655
1656#ifdef VBOX_STRICT
1657 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1658 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1659#endif
1660 *pfFlush = false;
1661
1662 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1663 {
1664 /* Check the new value written by the guest. If present and with a bogus physical address, then
1665 * it's fairly safe to assume the guest is reusing the PT.
1666 */
1667 if ( fAllowRemoval
1668 && pGstPT->a[i].n.u1Present)
1669 {
1670 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1671 {
1672 *pfFlush = true;
1673 return ++cChanged;
1674 }
1675 }
1676 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1677 {
1678 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1679 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1680 {
1681#ifdef VBOX_STRICT
1682 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1683 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1684 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1685#endif
1686 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1687 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1688 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1689 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1690
1691 if ( uHostAttr == uGuestAttr
1692 && fHostRW <= fGuestRW)
1693 continue;
1694 }
1695 cChanged++;
1696 /* Something was changed, so flush it. */
1697 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1698 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1699 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1700 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1701 }
1702 }
1703 return cChanged;
1704}
1705
1706
1707/**
1708 * Clear references to guest physical memory in a PAE / PAE page table.
1709 *
1710 * @returns nr of changed PTEs
1711 * @param pPool The pool.
1712 * @param pPage The page.
1713 * @param pShwPT The shadow page table (mapping of the page).
1714 * @param pGstPT The guest page table.
1715 * @param pOldGstPT The old cached guest page table.
1716 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1717 * @param pfFlush Flush reused page table (out)
1718 */
1719DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1720 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1721{
1722 unsigned cChanged = 0;
1723
1724#ifdef VBOX_STRICT
1725 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1726 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1727#endif
1728 *pfFlush = false;
1729
1730 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1731 {
1732 /* Check the new value written by the guest. If present and with a bogus physical address, then
1733 * it's fairly safe to assume the guest is reusing the PT.
1734 */
1735 if ( fAllowRemoval
1736 && pGstPT->a[i].n.u1Present)
1737 {
1738 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK))
1739 {
1740 *pfFlush = true;
1741 return ++cChanged;
1742 }
1743 }
1744 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1745 {
1746 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1747 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1748 {
1749#ifdef VBOX_STRICT
1750 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1751 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1752 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1753#endif
1754 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1755 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1756 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1757 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1758
1759 if ( uHostAttr == uGuestAttr
1760 && fHostRW <= fGuestRW)
1761 continue;
1762 }
1763 cChanged++;
1764 /* Something was changed, so flush it. */
1765 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1766 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1767 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1768 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1769 }
1770 }
1771 return cChanged;
1772}
1773
1774
1775/**
1776 * Flush a dirty page
1777 *
1778 * @param pVM The cross context VM structure.
1779 * @param pPool The pool.
1780 * @param idxSlot Dirty array slot index
1781 * @param fAllowRemoval Allow a reused page table to be removed
1782 */
1783static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1784{
1785 PPGMPOOLPAGE pPage;
1786 unsigned idxPage;
1787
1788 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1789 if (pPool->aDirtyPages[idxSlot].uIdx == NIL_PGMPOOL_IDX)
1790 return;
1791
1792 idxPage = pPool->aDirtyPages[idxSlot].uIdx;
1793 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1794 pPage = &pPool->aPages[idxPage];
1795 Assert(pPage->idx == idxPage);
1796 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1797
1798 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1799 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1800
1801#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1802 PVMCPU pVCpu = VMMGetCpu(pVM);
1803 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
1804#endif
1805
1806 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1807 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK);
1808 Assert(rc == VINF_SUCCESS);
1809 pPage->fDirty = false;
1810
1811#ifdef VBOX_STRICT
1812 uint64_t fFlags = 0;
1813 RTHCPHYS HCPhys;
1814 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->GCPtrDirtyFault, &fFlags, &HCPhys);
1815 AssertMsg( ( rc == VINF_SUCCESS
1816 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1817 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1818 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1819 || rc == VERR_PAGE_NOT_PRESENT,
1820 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->GCPtrDirtyFault, rc, fFlags));
1821#endif
1822
1823 /* Flush those PTEs that have changed. */
1824 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1825 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1826 void *pvGst;
1827 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1828 bool fFlush;
1829 unsigned cChanges;
1830
1831 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1832 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1833 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1834 else
1835 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1836 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1837
1838 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1839 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1840 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1841 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1842
1843 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1844 Assert(pPage->cModifications);
1845 if (cChanges < 4)
1846 pPage->cModifications = 1; /* must use > 0 here */
1847 else
1848 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1849
1850 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1851 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1852 pPool->idxFreeDirtyPage = idxSlot;
1853
1854 pPool->cDirtyPages--;
1855 pPool->aDirtyPages[idxSlot].uIdx = NIL_PGMPOOL_IDX;
1856 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1857 if (fFlush)
1858 {
1859 Assert(fAllowRemoval);
1860 Log(("Flush reused page table!\n"));
1861 pgmPoolFlushPage(pPool, pPage);
1862 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1863 }
1864 else
1865 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1866
1867#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1868 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
1869#endif
1870}
1871
1872
1873# ifndef IN_RING3
1874/**
1875 * Add a new dirty page
1876 *
1877 * @param pVM The cross context VM structure.
1878 * @param pPool The pool.
1879 * @param pPage The page.
1880 */
1881void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1882{
1883 unsigned idxFree;
1884
1885 PGM_LOCK_ASSERT_OWNER(pVM);
1886 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1887 Assert(!pPage->fDirty);
1888
1889 idxFree = pPool->idxFreeDirtyPage;
1890 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1891 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1892
1893 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1894 {
1895 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1896 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1897 }
1898 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1899 AssertMsg(pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1900
1901 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1902
1903 /*
1904 * Make a copy of the guest page table as we require valid GCPhys addresses
1905 * when removing references to physical pages.
1906 * (The HCPhys linear lookup is *extremely* expensive!)
1907 */
1908 void *pvGst;
1909 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1910 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst, (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT) ? PAGE_SIZE : PAGE_SIZE/2);
1911# ifdef VBOX_STRICT
1912 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1913 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1914 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1915 else
1916 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1917 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1918# endif
1919 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1920
1921 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1922 pPage->fDirty = true;
1923 pPage->idxDirtyEntry = (uint8_t)idxFree; Assert(pPage->idxDirtyEntry == idxFree);
1924 pPool->aDirtyPages[idxFree].uIdx = pPage->idx;
1925 pPool->cDirtyPages++;
1926
1927 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1928 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1929 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1930 {
1931 unsigned i;
1932 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1933 {
1934 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1935 if (pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX)
1936 {
1937 pPool->idxFreeDirtyPage = idxFree;
1938 break;
1939 }
1940 }
1941 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1942 }
1943
1944 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX);
1945
1946 /*
1947 * Clear all references to this shadow table. See @bugref{7298}.
1948 */
1949 pgmPoolTrackClearPageUsers(pPool, pPage);
1950}
1951# endif /* !IN_RING3 */
1952
1953
1954/**
1955 * Check if the specified page is dirty (not write monitored)
1956 *
1957 * @return dirty or not
1958 * @param pVM The cross context VM structure.
1959 * @param GCPhys Guest physical address
1960 */
1961bool pgmPoolIsDirtyPage(PVM pVM, RTGCPHYS GCPhys)
1962{
1963 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1964 PGM_LOCK_ASSERT_OWNER(pVM);
1965 if (!pPool->cDirtyPages)
1966 return false;
1967
1968 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1969
1970 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1971 {
1972 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
1973 {
1974 PPGMPOOLPAGE pPage;
1975 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
1976
1977 pPage = &pPool->aPages[idxPage];
1978 if (pPage->GCPhys == GCPhys)
1979 return true;
1980 }
1981 }
1982 return false;
1983}
1984
1985
1986/**
1987 * Reset all dirty pages by reinstating page monitoring.
1988 *
1989 * @param pVM The cross context VM structure.
1990 */
1991void pgmPoolResetDirtyPages(PVM pVM)
1992{
1993 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1994 PGM_LOCK_ASSERT_OWNER(pVM);
1995 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1996
1997 if (!pPool->cDirtyPages)
1998 return;
1999
2000 Log(("pgmPoolResetDirtyPages\n"));
2001 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2002 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
2003
2004 pPool->idxFreeDirtyPage = 0;
2005 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
2006 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
2007 {
2008 unsigned i;
2009 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2010 {
2011 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
2012 {
2013 pPool->idxFreeDirtyPage = i;
2014 break;
2015 }
2016 }
2017 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
2018 }
2019
2020 Assert(pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
2021 return;
2022}
2023
2024
2025/**
2026 * Invalidate the PT entry for the specified page
2027 *
2028 * @param pVM The cross context VM structure.
2029 * @param GCPtrPage Guest page to invalidate
2030 */
2031void pgmPoolResetDirtyPage(PVM pVM, RTGCPTR GCPtrPage)
2032{
2033 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2034 PGM_LOCK_ASSERT_OWNER(pVM);
2035 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
2036
2037 if (!pPool->cDirtyPages)
2038 return;
2039
2040 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage)); RT_NOREF_PV(GCPtrPage);
2041 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2042 {
2043 }
2044}
2045
2046
2047/**
2048 * Reset all dirty pages by reinstating page monitoring.
2049 *
2050 * @param pVM The cross context VM structure.
2051 * @param GCPhysPT Physical address of the page table
2052 */
2053void pgmPoolInvalidateDirtyPage(PVM pVM, RTGCPHYS GCPhysPT)
2054{
2055 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2056 PGM_LOCK_ASSERT_OWNER(pVM);
2057 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
2058 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
2059
2060 if (!pPool->cDirtyPages)
2061 return;
2062
2063 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2064
2065 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2066 {
2067 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
2068 {
2069 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
2070
2071 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
2072 if (pPage->GCPhys == GCPhysPT)
2073 {
2074 idxDirtyPage = i;
2075 break;
2076 }
2077 }
2078 }
2079
2080 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
2081 {
2082 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
2083 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
2084 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
2085 {
2086 unsigned i;
2087 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2088 {
2089 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
2090 {
2091 pPool->idxFreeDirtyPage = i;
2092 break;
2093 }
2094 }
2095 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
2096 }
2097 }
2098}
2099
2100# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
2101
2102/**
2103 * Inserts a page into the GCPhys hash table.
2104 *
2105 * @param pPool The pool.
2106 * @param pPage The page.
2107 */
2108DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2109{
2110 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
2111 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2112 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2113 pPage->iNext = pPool->aiHash[iHash];
2114 pPool->aiHash[iHash] = pPage->idx;
2115}
2116
2117
2118/**
2119 * Removes a page from the GCPhys hash table.
2120 *
2121 * @param pPool The pool.
2122 * @param pPage The page.
2123 */
2124DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2125{
2126 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
2127 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2128 if (pPool->aiHash[iHash] == pPage->idx)
2129 pPool->aiHash[iHash] = pPage->iNext;
2130 else
2131 {
2132 uint16_t iPrev = pPool->aiHash[iHash];
2133 for (;;)
2134 {
2135 const int16_t i = pPool->aPages[iPrev].iNext;
2136 if (i == pPage->idx)
2137 {
2138 pPool->aPages[iPrev].iNext = pPage->iNext;
2139 break;
2140 }
2141 if (i == NIL_PGMPOOL_IDX)
2142 {
2143 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
2144 break;
2145 }
2146 iPrev = i;
2147 }
2148 }
2149 pPage->iNext = NIL_PGMPOOL_IDX;
2150}
2151
2152
2153/**
2154 * Frees up one cache page.
2155 *
2156 * @returns VBox status code.
2157 * @retval VINF_SUCCESS on success.
2158 * @param pPool The pool.
2159 * @param iUser The user index.
2160 */
2161static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
2162{
2163#ifndef IN_RC
2164 const PVM pVM = pPool->CTX_SUFF(pVM);
2165#endif
2166 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
2167 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
2168
2169 /*
2170 * Select one page from the tail of the age list.
2171 */
2172 PPGMPOOLPAGE pPage;
2173 for (unsigned iLoop = 0; ; iLoop++)
2174 {
2175 uint16_t iToFree = pPool->iAgeTail;
2176 if (iToFree == iUser && iUser != NIL_PGMPOOL_IDX)
2177 iToFree = pPool->aPages[iToFree].iAgePrev;
2178/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2179 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2180 {
2181 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2182 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2183 {
2184 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2185 continue;
2186 iToFree = i;
2187 break;
2188 }
2189 }
2190*/
2191 Assert(iToFree != iUser);
2192 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
2193 pPage = &pPool->aPages[iToFree];
2194
2195 /*
2196 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2197 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2198 */
2199 if ( !pgmPoolIsPageLocked(pPage)
2200 && pPage->idx >= PGMPOOL_IDX_FIRST /* paranoia (#6349) */)
2201 break;
2202 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2203 pgmPoolCacheUsed(pPool, pPage);
2204 AssertLogRelReturn(iLoop < 8192, VERR_PGM_POOL_TOO_MANY_LOOPS);
2205 }
2206
2207 /*
2208 * Found a usable page, flush it and return.
2209 */
2210 int rc = pgmPoolFlushPage(pPool, pPage);
2211 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2212 /** @todo find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2213 if (rc == VINF_SUCCESS)
2214 PGM_INVL_ALL_VCPU_TLBS(pVM);
2215 return rc;
2216}
2217
2218
2219/**
2220 * Checks if a kind mismatch is really a page being reused
2221 * or if it's just normal remappings.
2222 *
2223 * @returns true if reused and the cached page (enmKind1) should be flushed
2224 * @returns false if not reused.
2225 * @param enmKind1 The kind of the cached page.
2226 * @param enmKind2 The kind of the requested page.
2227 */
2228static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2229{
2230 switch (enmKind1)
2231 {
2232 /*
2233 * Never reuse them. There is no remapping in non-paging mode.
2234 */
2235 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2236 case PGMPOOLKIND_32BIT_PD_PHYS:
2237 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2238 case PGMPOOLKIND_PAE_PD_PHYS:
2239 case PGMPOOLKIND_PAE_PDPT_PHYS:
2240 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2241 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2242 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2243 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2244 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2245 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2246 return false;
2247
2248 /*
2249 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2250 */
2251 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2252 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2253 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2254 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2255 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2256 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2257 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2258 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2259 case PGMPOOLKIND_32BIT_PD:
2260 case PGMPOOLKIND_PAE_PDPT:
2261 switch (enmKind2)
2262 {
2263 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2264 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2265 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2266 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2267 case PGMPOOLKIND_64BIT_PML4:
2268 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2269 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2270 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2271 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2272 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2273 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2274 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2275 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2276 return true;
2277 default:
2278 return false;
2279 }
2280
2281 /*
2282 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2283 */
2284 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2285 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2286 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2287 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2288 case PGMPOOLKIND_64BIT_PML4:
2289 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2290 switch (enmKind2)
2291 {
2292 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2293 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2294 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2295 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2296 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2297 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2298 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2299 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2300 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2301 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2302 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2303 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2304 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2305 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2306 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2307 return true;
2308 default:
2309 return false;
2310 }
2311
2312 /*
2313 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2314 */
2315 case PGMPOOLKIND_ROOT_NESTED:
2316 return false;
2317
2318 default:
2319 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2320 }
2321}
2322
2323
2324/**
2325 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2326 *
2327 * @returns VBox status code.
2328 * @retval VINF_PGM_CACHED_PAGE on success.
2329 * @retval VERR_FILE_NOT_FOUND if not found.
2330 * @param pPool The pool.
2331 * @param GCPhys The GC physical address of the page we're gonna shadow.
2332 * @param enmKind The kind of mapping.
2333 * @param enmAccess Access type for the mapping (only relevant for big pages)
2334 * @param fA20Enabled Whether the CPU has the A20 gate enabled.
2335 * @param iUser The shadow page pool index of the user table. This is
2336 * NIL_PGMPOOL_IDX for root pages.
2337 * @param iUserTable The index into the user table (shadowed). Ignored if
2338 * root page
2339 * @param ppPage Where to store the pointer to the page.
2340 */
2341static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
2342 uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2343{
2344 /*
2345 * Look up the GCPhys in the hash.
2346 */
2347 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2348 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2349 if (i != NIL_PGMPOOL_IDX)
2350 {
2351 do
2352 {
2353 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2354 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2355 if (pPage->GCPhys == GCPhys)
2356 {
2357 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2358 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess
2359 && pPage->fA20Enabled == fA20Enabled)
2360 {
2361 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2362 * doesn't flush it in case there are no more free use records.
2363 */
2364 pgmPoolCacheUsed(pPool, pPage);
2365
2366 int rc = VINF_SUCCESS;
2367 if (iUser != NIL_PGMPOOL_IDX)
2368 rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2369 if (RT_SUCCESS(rc))
2370 {
2371 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2372 *ppPage = pPage;
2373 if (pPage->cModifications)
2374 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2375 STAM_COUNTER_INC(&pPool->StatCacheHits);
2376 return VINF_PGM_CACHED_PAGE;
2377 }
2378 return rc;
2379 }
2380
2381 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2382 {
2383 /*
2384 * The kind is different. In some cases we should now flush the page
2385 * as it has been reused, but in most cases this is normal remapping
2386 * of PDs as PT or big pages using the GCPhys field in a slightly
2387 * different way than the other kinds.
2388 */
2389 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2390 {
2391 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2392 pgmPoolFlushPage(pPool, pPage);
2393 break;
2394 }
2395 }
2396 }
2397
2398 /* next */
2399 i = pPage->iNext;
2400 } while (i != NIL_PGMPOOL_IDX);
2401 }
2402
2403 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2404 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2405 return VERR_FILE_NOT_FOUND;
2406}
2407
2408
2409/**
2410 * Inserts a page into the cache.
2411 *
2412 * @param pPool The pool.
2413 * @param pPage The cached page.
2414 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2415 */
2416static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2417{
2418 /*
2419 * Insert into the GCPhys hash if the page is fit for that.
2420 */
2421 Assert(!pPage->fCached);
2422 if (fCanBeCached)
2423 {
2424 pPage->fCached = true;
2425 pgmPoolHashInsert(pPool, pPage);
2426 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2427 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2428 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2429 }
2430 else
2431 {
2432 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2433 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2434 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2435 }
2436
2437 /*
2438 * Insert at the head of the age list.
2439 */
2440 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2441 pPage->iAgeNext = pPool->iAgeHead;
2442 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2443 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2444 else
2445 pPool->iAgeTail = pPage->idx;
2446 pPool->iAgeHead = pPage->idx;
2447}
2448
2449
2450/**
2451 * Flushes a cached page.
2452 *
2453 * @param pPool The pool.
2454 * @param pPage The cached page.
2455 */
2456static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2457{
2458 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2459
2460 /*
2461 * Remove the page from the hash.
2462 */
2463 if (pPage->fCached)
2464 {
2465 pPage->fCached = false;
2466 pgmPoolHashRemove(pPool, pPage);
2467 }
2468 else
2469 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2470
2471 /*
2472 * Remove it from the age list.
2473 */
2474 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2475 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2476 else
2477 pPool->iAgeTail = pPage->iAgePrev;
2478 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2479 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2480 else
2481 pPool->iAgeHead = pPage->iAgeNext;
2482 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2483 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2484}
2485
2486
2487/**
2488 * Looks for pages sharing the monitor.
2489 *
2490 * @returns Pointer to the head page.
2491 * @returns NULL if not found.
2492 * @param pPool The Pool
2493 * @param pNewPage The page which is going to be monitored.
2494 */
2495static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2496{
2497 /*
2498 * Look up the GCPhys in the hash.
2499 */
2500 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2501 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2502 if (i == NIL_PGMPOOL_IDX)
2503 return NULL;
2504 do
2505 {
2506 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2507 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2508 && pPage != pNewPage)
2509 {
2510 switch (pPage->enmKind)
2511 {
2512 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2513 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2514 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2515 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2516 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2517 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2518 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2519 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2520 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2521 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2522 case PGMPOOLKIND_64BIT_PML4:
2523 case PGMPOOLKIND_32BIT_PD:
2524 case PGMPOOLKIND_PAE_PDPT:
2525 {
2526 /* find the head */
2527 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2528 {
2529 Assert(pPage->iMonitoredPrev != pPage->idx);
2530 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2531 }
2532 return pPage;
2533 }
2534
2535 /* ignore, no monitoring. */
2536 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2537 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2538 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2539 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2540 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2541 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2542 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2543 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2544 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2545 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2546 case PGMPOOLKIND_ROOT_NESTED:
2547 case PGMPOOLKIND_PAE_PD_PHYS:
2548 case PGMPOOLKIND_PAE_PDPT_PHYS:
2549 case PGMPOOLKIND_32BIT_PD_PHYS:
2550 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2551 break;
2552 default:
2553 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2554 }
2555 }
2556
2557 /* next */
2558 i = pPage->iNext;
2559 } while (i != NIL_PGMPOOL_IDX);
2560 return NULL;
2561}
2562
2563
2564/**
2565 * Enabled write monitoring of a guest page.
2566 *
2567 * @returns VBox status code.
2568 * @retval VINF_SUCCESS on success.
2569 * @param pPool The pool.
2570 * @param pPage The cached page.
2571 */
2572static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2573{
2574 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2575
2576 /*
2577 * Filter out the relevant kinds.
2578 */
2579 switch (pPage->enmKind)
2580 {
2581 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2582 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2583 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2584 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2585 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2586 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2587 case PGMPOOLKIND_64BIT_PML4:
2588 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2589 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2590 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2591 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2592 case PGMPOOLKIND_32BIT_PD:
2593 case PGMPOOLKIND_PAE_PDPT:
2594 break;
2595
2596 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2597 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2598 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2599 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2600 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2601 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2602 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2603 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2604 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2605 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2606 case PGMPOOLKIND_ROOT_NESTED:
2607 /* Nothing to monitor here. */
2608 return VINF_SUCCESS;
2609
2610 case PGMPOOLKIND_32BIT_PD_PHYS:
2611 case PGMPOOLKIND_PAE_PDPT_PHYS:
2612 case PGMPOOLKIND_PAE_PD_PHYS:
2613 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2614 /* Nothing to monitor here. */
2615 return VINF_SUCCESS;
2616 default:
2617 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2618 }
2619
2620 /*
2621 * Install handler.
2622 */
2623 int rc;
2624 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2625 if (pPageHead)
2626 {
2627 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2628 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2629
2630#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2631 if (pPageHead->fDirty)
2632 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirtyEntry, false /* do not remove */);
2633#endif
2634
2635 pPage->iMonitoredPrev = pPageHead->idx;
2636 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2637 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2638 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2639 pPageHead->iMonitoredNext = pPage->idx;
2640 rc = VINF_SUCCESS;
2641 }
2642 else
2643 {
2644 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2645 PVM pVM = pPool->CTX_SUFF(pVM);
2646 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2647 rc = PGMHandlerPhysicalRegister(pVM, GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK, pPool->hAccessHandlerType,
2648 MMHyperCCToR3(pVM, pPage), MMHyperCCToR0(pVM, pPage), MMHyperCCToRC(pVM, pPage),
2649 NIL_RTR3PTR /*pszDesc*/);
2650 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2651 * the heap size should suffice. */
2652 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2653 PVMCPU pVCpu = VMMGetCpu(pVM);
2654 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2655 }
2656 pPage->fMonitored = true;
2657 return rc;
2658}
2659
2660
2661/**
2662 * Disables write monitoring of a guest page.
2663 *
2664 * @returns VBox status code.
2665 * @retval VINF_SUCCESS on success.
2666 * @param pPool The pool.
2667 * @param pPage The cached page.
2668 */
2669static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2670{
2671 /*
2672 * Filter out the relevant kinds.
2673 */
2674 switch (pPage->enmKind)
2675 {
2676 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2677 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2678 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2679 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2680 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2681 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2682 case PGMPOOLKIND_64BIT_PML4:
2683 case PGMPOOLKIND_32BIT_PD:
2684 case PGMPOOLKIND_PAE_PDPT:
2685 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2686 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2687 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2688 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2689 break;
2690
2691 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2692 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2693 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2694 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2695 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2696 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2697 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2698 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2699 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2700 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2701 case PGMPOOLKIND_ROOT_NESTED:
2702 case PGMPOOLKIND_PAE_PD_PHYS:
2703 case PGMPOOLKIND_PAE_PDPT_PHYS:
2704 case PGMPOOLKIND_32BIT_PD_PHYS:
2705 /* Nothing to monitor here. */
2706 Assert(!pPage->fMonitored);
2707 return VINF_SUCCESS;
2708
2709 default:
2710 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2711 }
2712 Assert(pPage->fMonitored);
2713
2714 /*
2715 * Remove the page from the monitored list or uninstall it if last.
2716 */
2717 const PVM pVM = pPool->CTX_SUFF(pVM);
2718 int rc;
2719 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2720 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2721 {
2722 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2723 {
2724 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2725 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2726 rc = PGMHandlerPhysicalChangeUserArgs(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK, MMHyperCCToR3(pVM, pNewHead),
2727 MMHyperCCToR0(pVM, pNewHead), MMHyperCCToRC(pVM, pNewHead));
2728
2729 AssertFatalRCSuccess(rc);
2730 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2731 }
2732 else
2733 {
2734 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2735 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2736 {
2737 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2738 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2739 }
2740 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2741 rc = VINF_SUCCESS;
2742 }
2743 }
2744 else
2745 {
2746 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2747 AssertFatalRC(rc);
2748 PVMCPU pVCpu = VMMGetCpu(pVM);
2749 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2750 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2751 }
2752 pPage->fMonitored = false;
2753
2754 /*
2755 * Remove it from the list of modified pages (if in it).
2756 */
2757 pgmPoolMonitorModifiedRemove(pPool, pPage);
2758
2759 return rc;
2760}
2761
2762
2763/**
2764 * Inserts the page into the list of modified pages.
2765 *
2766 * @param pPool The pool.
2767 * @param pPage The page.
2768 */
2769void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2770{
2771 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2772 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2773 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2774 && pPool->iModifiedHead != pPage->idx,
2775 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2776 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2777 pPool->iModifiedHead, pPool->cModifiedPages));
2778
2779 pPage->iModifiedNext = pPool->iModifiedHead;
2780 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2781 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2782 pPool->iModifiedHead = pPage->idx;
2783 pPool->cModifiedPages++;
2784#ifdef VBOX_WITH_STATISTICS
2785 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2786 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2787#endif
2788}
2789
2790
2791/**
2792 * Removes the page from the list of modified pages and resets the
2793 * modification counter.
2794 *
2795 * @param pPool The pool.
2796 * @param pPage The page which is believed to be in the list of modified pages.
2797 */
2798static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2799{
2800 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2801 if (pPool->iModifiedHead == pPage->idx)
2802 {
2803 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2804 pPool->iModifiedHead = pPage->iModifiedNext;
2805 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2806 {
2807 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2808 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2809 }
2810 pPool->cModifiedPages--;
2811 }
2812 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2813 {
2814 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2815 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2816 {
2817 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2818 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2819 }
2820 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2821 pPool->cModifiedPages--;
2822 }
2823 else
2824 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2825 pPage->cModifications = 0;
2826}
2827
2828
2829/**
2830 * Zaps the list of modified pages, resetting their modification counters in the process.
2831 *
2832 * @param pVM The cross context VM structure.
2833 */
2834static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2835{
2836 pgmLock(pVM);
2837 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2838 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2839
2840 unsigned cPages = 0; NOREF(cPages);
2841
2842#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2843 pgmPoolResetDirtyPages(pVM);
2844#endif
2845
2846 uint16_t idx = pPool->iModifiedHead;
2847 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2848 while (idx != NIL_PGMPOOL_IDX)
2849 {
2850 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2851 idx = pPage->iModifiedNext;
2852 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2853 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2854 pPage->cModifications = 0;
2855 Assert(++cPages);
2856 }
2857 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2858 pPool->cModifiedPages = 0;
2859 pgmUnlock(pVM);
2860}
2861
2862
2863/**
2864 * Handle SyncCR3 pool tasks
2865 *
2866 * @returns VBox status code.
2867 * @retval VINF_SUCCESS if successfully added.
2868 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2869 * @param pVCpu The cross context virtual CPU structure.
2870 * @remark Should only be used when monitoring is available, thus placed in
2871 * the PGMPOOL_WITH_MONITORING \#ifdef.
2872 */
2873int pgmPoolSyncCR3(PVMCPU pVCpu)
2874{
2875 PVM pVM = pVCpu->CTX_SUFF(pVM);
2876 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2877
2878 /*
2879 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2880 * Occasionally we will have to clear all the shadow page tables because we wanted
2881 * to monitor a page which was mapped by too many shadowed page tables. This operation
2882 * sometimes referred to as a 'lightweight flush'.
2883 */
2884# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2885 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2886 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2887# else /* !IN_RING3 */
2888 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2889 {
2890 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2891 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2892
2893 /* Make sure all other VCPUs return to ring 3. */
2894 if (pVM->cCpus > 1)
2895 {
2896 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2897 PGM_INVL_ALL_VCPU_TLBS(pVM);
2898 }
2899 return VINF_PGM_SYNC_CR3;
2900 }
2901# endif /* !IN_RING3 */
2902 else
2903 {
2904 pgmPoolMonitorModifiedClearAll(pVM);
2905
2906 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2907 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2908 {
2909 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2910 return pgmPoolSyncCR3(pVCpu);
2911 }
2912 }
2913 return VINF_SUCCESS;
2914}
2915
2916
2917/**
2918 * Frees up at least one user entry.
2919 *
2920 * @returns VBox status code.
2921 * @retval VINF_SUCCESS if successfully added.
2922 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2923 * @param pPool The pool.
2924 * @param iUser The user index.
2925 */
2926static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2927{
2928 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2929 /*
2930 * Just free cached pages in a braindead fashion.
2931 */
2932 /** @todo walk the age list backwards and free the first with usage. */
2933 int rc = VINF_SUCCESS;
2934 do
2935 {
2936 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2937 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2938 rc = rc2;
2939 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2940 return rc;
2941}
2942
2943
2944/**
2945 * Inserts a page into the cache.
2946 *
2947 * This will create user node for the page, insert it into the GCPhys
2948 * hash, and insert it into the age list.
2949 *
2950 * @returns VBox status code.
2951 * @retval VINF_SUCCESS if successfully added.
2952 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2953 * @param pPool The pool.
2954 * @param pPage The cached page.
2955 * @param GCPhys The GC physical address of the page we're gonna shadow.
2956 * @param iUser The user index.
2957 * @param iUserTable The user table index.
2958 */
2959DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2960{
2961 int rc = VINF_SUCCESS;
2962 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2963
2964 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable)); RT_NOREF_PV(GCPhys);
2965
2966 if (iUser != NIL_PGMPOOL_IDX)
2967 {
2968#ifdef VBOX_STRICT
2969 /*
2970 * Check that the entry doesn't already exists.
2971 */
2972 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2973 {
2974 uint16_t i = pPage->iUserHead;
2975 do
2976 {
2977 Assert(i < pPool->cMaxUsers);
2978 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2979 i = paUsers[i].iNext;
2980 } while (i != NIL_PGMPOOL_USER_INDEX);
2981 }
2982#endif
2983
2984 /*
2985 * Find free a user node.
2986 */
2987 uint16_t i = pPool->iUserFreeHead;
2988 if (i == NIL_PGMPOOL_USER_INDEX)
2989 {
2990 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2991 if (RT_FAILURE(rc))
2992 return rc;
2993 i = pPool->iUserFreeHead;
2994 }
2995
2996 /*
2997 * Unlink the user node from the free list,
2998 * initialize and insert it into the user list.
2999 */
3000 pPool->iUserFreeHead = paUsers[i].iNext;
3001 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
3002 paUsers[i].iUser = iUser;
3003 paUsers[i].iUserTable = iUserTable;
3004 pPage->iUserHead = i;
3005 }
3006 else
3007 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3008
3009
3010 /*
3011 * Insert into cache and enable monitoring of the guest page if enabled.
3012 *
3013 * Until we implement caching of all levels, including the CR3 one, we'll
3014 * have to make sure we don't try monitor & cache any recursive reuse of
3015 * a monitored CR3 page. Because all windows versions are doing this we'll
3016 * have to be able to do combined access monitoring, CR3 + PT and
3017 * PD + PT (guest PAE).
3018 *
3019 * Update:
3020 * We're now cooperating with the CR3 monitor if an uncachable page is found.
3021 */
3022 const bool fCanBeMonitored = true;
3023 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
3024 if (fCanBeMonitored)
3025 {
3026 rc = pgmPoolMonitorInsert(pPool, pPage);
3027 AssertRC(rc);
3028 }
3029 return rc;
3030}
3031
3032
3033/**
3034 * Adds a user reference to a page.
3035 *
3036 * This will move the page to the head of the
3037 *
3038 * @returns VBox status code.
3039 * @retval VINF_SUCCESS if successfully added.
3040 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3041 * @param pPool The pool.
3042 * @param pPage The cached page.
3043 * @param iUser The user index.
3044 * @param iUserTable The user table.
3045 */
3046static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3047{
3048 Log3(("pgmPoolTrackAddUser: GCPhys=%RGp iUser=%x iUserTable=%x\n", pPage->GCPhys, iUser, iUserTable));
3049 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3050 Assert(iUser != NIL_PGMPOOL_IDX);
3051
3052# ifdef VBOX_STRICT
3053 /*
3054 * Check that the entry doesn't already exists. We only allow multiple
3055 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
3056 */
3057 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
3058 {
3059 uint16_t i = pPage->iUserHead;
3060 do
3061 {
3062 Assert(i < pPool->cMaxUsers);
3063 /** @todo this assertion looks odd... Shouldn't it be && here? */
3064 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
3065 i = paUsers[i].iNext;
3066 } while (i != NIL_PGMPOOL_USER_INDEX);
3067 }
3068# endif
3069
3070 /*
3071 * Allocate a user node.
3072 */
3073 uint16_t i = pPool->iUserFreeHead;
3074 if (i == NIL_PGMPOOL_USER_INDEX)
3075 {
3076 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
3077 if (RT_FAILURE(rc))
3078 return rc;
3079 i = pPool->iUserFreeHead;
3080 }
3081 pPool->iUserFreeHead = paUsers[i].iNext;
3082
3083 /*
3084 * Initialize the user node and insert it.
3085 */
3086 paUsers[i].iNext = pPage->iUserHead;
3087 paUsers[i].iUser = iUser;
3088 paUsers[i].iUserTable = iUserTable;
3089 pPage->iUserHead = i;
3090
3091# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3092 if (pPage->fDirty)
3093 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirtyEntry, false /* do not remove */);
3094# endif
3095
3096 /*
3097 * Tell the cache to update its replacement stats for this page.
3098 */
3099 pgmPoolCacheUsed(pPool, pPage);
3100 return VINF_SUCCESS;
3101}
3102
3103
3104/**
3105 * Frees a user record associated with a page.
3106 *
3107 * This does not clear the entry in the user table, it simply replaces the
3108 * user record to the chain of free records.
3109 *
3110 * @param pPool The pool.
3111 * @param pPage The shadow page.
3112 * @param iUser The shadow page pool index of the user table.
3113 * @param iUserTable The index into the user table (shadowed).
3114 *
3115 * @remarks Don't call this for root pages.
3116 */
3117static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3118{
3119 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
3120 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3121 Assert(iUser != NIL_PGMPOOL_IDX);
3122
3123 /*
3124 * Unlink and free the specified user entry.
3125 */
3126
3127 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
3128 uint16_t i = pPage->iUserHead;
3129 if ( i != NIL_PGMPOOL_USER_INDEX
3130 && paUsers[i].iUser == iUser
3131 && paUsers[i].iUserTable == iUserTable)
3132 {
3133 pPage->iUserHead = paUsers[i].iNext;
3134
3135 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3136 paUsers[i].iNext = pPool->iUserFreeHead;
3137 pPool->iUserFreeHead = i;
3138 return;
3139 }
3140
3141 /* General: Linear search. */
3142 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
3143 while (i != NIL_PGMPOOL_USER_INDEX)
3144 {
3145 if ( paUsers[i].iUser == iUser
3146 && paUsers[i].iUserTable == iUserTable)
3147 {
3148 if (iPrev != NIL_PGMPOOL_USER_INDEX)
3149 paUsers[iPrev].iNext = paUsers[i].iNext;
3150 else
3151 pPage->iUserHead = paUsers[i].iNext;
3152
3153 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3154 paUsers[i].iNext = pPool->iUserFreeHead;
3155 pPool->iUserFreeHead = i;
3156 return;
3157 }
3158 iPrev = i;
3159 i = paUsers[i].iNext;
3160 }
3161
3162 /* Fatal: didn't find it */
3163 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
3164 iUser, iUserTable, pPage->GCPhys));
3165}
3166
3167
3168#if 0 /* unused */
3169/**
3170 * Gets the entry size of a shadow table.
3171 *
3172 * @param enmKind The kind of page.
3173 *
3174 * @returns The size of the entry in bytes. That is, 4 or 8.
3175 * @returns If the kind is not for a table, an assertion is raised and 0 is
3176 * returned.
3177 */
3178DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
3179{
3180 switch (enmKind)
3181 {
3182 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3183 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3184 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3185 case PGMPOOLKIND_32BIT_PD:
3186 case PGMPOOLKIND_32BIT_PD_PHYS:
3187 return 4;
3188
3189 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3190 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3191 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3192 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3193 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3194 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3195 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3196 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3197 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3198 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3199 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3200 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3201 case PGMPOOLKIND_64BIT_PML4:
3202 case PGMPOOLKIND_PAE_PDPT:
3203 case PGMPOOLKIND_ROOT_NESTED:
3204 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3205 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3206 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3207 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3208 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3209 case PGMPOOLKIND_PAE_PD_PHYS:
3210 case PGMPOOLKIND_PAE_PDPT_PHYS:
3211 return 8;
3212
3213 default:
3214 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3215 }
3216}
3217#endif /* unused */
3218
3219#if 0 /* unused */
3220/**
3221 * Gets the entry size of a guest table.
3222 *
3223 * @param enmKind The kind of page.
3224 *
3225 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3226 * @returns If the kind is not for a table, an assertion is raised and 0 is
3227 * returned.
3228 */
3229DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3230{
3231 switch (enmKind)
3232 {
3233 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3234 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3235 case PGMPOOLKIND_32BIT_PD:
3236 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3237 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3238 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3239 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3240 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3241 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3242 return 4;
3243
3244 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3245 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3246 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3247 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3248 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3249 case PGMPOOLKIND_64BIT_PML4:
3250 case PGMPOOLKIND_PAE_PDPT:
3251 return 8;
3252
3253 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3254 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3255 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3256 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3257 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3258 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3259 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3260 case PGMPOOLKIND_ROOT_NESTED:
3261 case PGMPOOLKIND_PAE_PD_PHYS:
3262 case PGMPOOLKIND_PAE_PDPT_PHYS:
3263 case PGMPOOLKIND_32BIT_PD_PHYS:
3264 /** @todo can we return 0? (nobody is calling this...) */
3265 AssertFailed();
3266 return 0;
3267
3268 default:
3269 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3270 }
3271}
3272#endif /* unused */
3273
3274
3275/**
3276 * Checks one shadow page table entry for a mapping of a physical page.
3277 *
3278 * @returns true / false indicating removal of all relevant PTEs
3279 *
3280 * @param pVM The cross context VM structure.
3281 * @param pPhysPage The guest page in question.
3282 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3283 * @param iShw The shadow page table.
3284 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3285 */
3286static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3287{
3288 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3289 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3290 bool fRet = false;
3291
3292 /*
3293 * Assert sanity.
3294 */
3295 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3296 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3297 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3298
3299 /*
3300 * Then, clear the actual mappings to the page in the shadow PT.
3301 */
3302 switch (pPage->enmKind)
3303 {
3304 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3305 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3306 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3307 {
3308 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3309 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3310 uint32_t u32AndMask = 0;
3311 uint32_t u32OrMask = 0;
3312
3313 if (!fFlushPTEs)
3314 {
3315 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3316 {
3317 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3318 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3319 u32OrMask = X86_PTE_RW;
3320 u32AndMask = UINT32_MAX;
3321 fRet = true;
3322 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3323 break;
3324
3325 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3326 u32OrMask = 0;
3327 u32AndMask = ~X86_PTE_RW;
3328 fRet = true;
3329 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3330 break;
3331 default:
3332 /* (shouldn't be here, will assert below) */
3333 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3334 break;
3335 }
3336 }
3337 else
3338 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3339
3340 /* Update the counter if we're removing references. */
3341 if (!u32AndMask)
3342 {
3343 Assert(pPage->cPresent);
3344 Assert(pPool->cPresent);
3345 pPage->cPresent--;
3346 pPool->cPresent--;
3347 }
3348
3349 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3350 {
3351 X86PTE Pte;
3352
3353 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3354 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3355 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3356 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3357
3358 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3359 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3360 return fRet;
3361 }
3362#ifdef LOG_ENABLED
3363 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3364 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3365 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3366 {
3367 Log(("i=%d cFound=%d\n", i, ++cFound));
3368 }
3369#endif
3370 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3371 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3372 break;
3373 }
3374
3375 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3376 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3377 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3378 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3379 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3380 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3381 {
3382 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3383 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3384 uint64_t u64OrMask = 0;
3385 uint64_t u64AndMask = 0;
3386
3387 if (!fFlushPTEs)
3388 {
3389 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3390 {
3391 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3392 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3393 u64OrMask = X86_PTE_RW;
3394 u64AndMask = UINT64_MAX;
3395 fRet = true;
3396 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3397 break;
3398
3399 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3400 u64OrMask = 0;
3401 u64AndMask = ~(uint64_t)X86_PTE_RW;
3402 fRet = true;
3403 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3404 break;
3405
3406 default:
3407 /* (shouldn't be here, will assert below) */
3408 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3409 break;
3410 }
3411 }
3412 else
3413 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3414
3415 /* Update the counter if we're removing references. */
3416 if (!u64AndMask)
3417 {
3418 Assert(pPage->cPresent);
3419 Assert(pPool->cPresent);
3420 pPage->cPresent--;
3421 pPool->cPresent--;
3422 }
3423
3424 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3425 {
3426 X86PTEPAE Pte;
3427
3428 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3429 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3430 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3431 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3432
3433 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3434 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3435 return fRet;
3436 }
3437#ifdef LOG_ENABLED
3438 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3439 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3440 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3441 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3442 Log(("i=%d cFound=%d\n", i, ++cFound));
3443#endif
3444 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3445 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3446 break;
3447 }
3448
3449#ifdef PGM_WITH_LARGE_PAGES
3450 /* Large page case only. */
3451 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3452 {
3453 Assert(pVM->pgm.s.fNestedPaging);
3454
3455 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3456 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3457
3458 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3459 {
3460 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3461 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3462 pPD->a[iPte].u = 0;
3463 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3464
3465 /* Update the counter as we're removing references. */
3466 Assert(pPage->cPresent);
3467 Assert(pPool->cPresent);
3468 pPage->cPresent--;
3469 pPool->cPresent--;
3470
3471 return fRet;
3472 }
3473# ifdef LOG_ENABLED
3474 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3475 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3476 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3477 Log(("i=%d cFound=%d\n", i, ++cFound));
3478# endif
3479 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3480 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3481 break;
3482 }
3483
3484 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3485 case PGMPOOLKIND_PAE_PD_PHYS:
3486 {
3487 Assert(pVM->pgm.s.fNestedPaging);
3488
3489 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3490 PX86PD pPD = (PX86PD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3491
3492 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3493 {
3494 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3495 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3496 pPD->a[iPte].u = 0;
3497 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3498
3499 /* Update the counter as we're removing references. */
3500 Assert(pPage->cPresent);
3501 Assert(pPool->cPresent);
3502 pPage->cPresent--;
3503 pPool->cPresent--;
3504 return fRet;
3505 }
3506# ifdef LOG_ENABLED
3507 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3508 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3509 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3510 Log(("i=%d cFound=%d\n", i, ++cFound));
3511# endif
3512 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3513 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3514 break;
3515 }
3516#endif /* PGM_WITH_LARGE_PAGES */
3517
3518 default:
3519 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3520 }
3521
3522 /* not reached. */
3523#ifndef _MSC_VER
3524 return fRet;
3525#endif
3526}
3527
3528
3529/**
3530 * Scans one shadow page table for mappings of a physical page.
3531 *
3532 * @param pVM The cross context VM structure.
3533 * @param pPhysPage The guest page in question.
3534 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3535 * @param iShw The shadow page table.
3536 */
3537static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3538{
3539 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3540
3541 /* We should only come here with when there's only one reference to this physical page. */
3542 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3543
3544 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3545 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3546 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3547 if (!fKeptPTEs)
3548 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3549 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3550}
3551
3552
3553/**
3554 * Flushes a list of shadow page tables mapping the same physical page.
3555 *
3556 * @param pVM The cross context VM structure.
3557 * @param pPhysPage The guest page in question.
3558 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3559 * @param iPhysExt The physical cross reference extent list to flush.
3560 */
3561static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3562{
3563 PGM_LOCK_ASSERT_OWNER(pVM);
3564 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3565 bool fKeepList = false;
3566
3567 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3568 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt=%u\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3569
3570 const uint16_t iPhysExtStart = iPhysExt;
3571 PPGMPOOLPHYSEXT pPhysExt;
3572 do
3573 {
3574 Assert(iPhysExt < pPool->cMaxPhysExts);
3575 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3576 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3577 {
3578 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3579 {
3580 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3581 if (!fKeptPTEs)
3582 {
3583 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3584 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3585 }
3586 else
3587 fKeepList = true;
3588 }
3589 }
3590 /* next */
3591 iPhysExt = pPhysExt->iNext;
3592 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3593
3594 if (!fKeepList)
3595 {
3596 /* insert the list into the free list and clear the ram range entry. */
3597 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3598 pPool->iPhysExtFreeHead = iPhysExtStart;
3599 /* Invalidate the tracking data. */
3600 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3601 }
3602
3603 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3604}
3605
3606
3607/**
3608 * Flushes all shadow page table mappings of the given guest page.
3609 *
3610 * This is typically called when the host page backing the guest one has been
3611 * replaced or when the page protection was changed due to a guest access
3612 * caught by the monitoring.
3613 *
3614 * @returns VBox status code.
3615 * @retval VINF_SUCCESS if all references has been successfully cleared.
3616 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3617 * pool cleaning. FF and sync flags are set.
3618 *
3619 * @param pVM The cross context VM structure.
3620 * @param GCPhysPage GC physical address of the page in question
3621 * @param pPhysPage The guest page in question.
3622 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3623 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3624 * flushed, it is NOT touched if this isn't necessary.
3625 * The caller MUST initialized this to @a false.
3626 */
3627int pgmPoolTrackUpdateGCPhys(PVM pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3628{
3629 PVMCPU pVCpu = VMMGetCpu(pVM);
3630 pgmLock(pVM);
3631 int rc = VINF_SUCCESS;
3632
3633#ifdef PGM_WITH_LARGE_PAGES
3634 /* Is this page part of a large page? */
3635 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3636 {
3637 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3638 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3639
3640 /* Fetch the large page base. */
3641 PPGMPAGE pLargePage;
3642 if (GCPhysBase != GCPhysPage)
3643 {
3644 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3645 AssertFatal(pLargePage);
3646 }
3647 else
3648 pLargePage = pPhysPage;
3649
3650 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3651
3652 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3653 {
3654 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3655 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3656 pVM->pgm.s.cLargePagesDisabled++;
3657
3658 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3659 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3660
3661 *pfFlushTLBs = true;
3662 pgmUnlock(pVM);
3663 return rc;
3664 }
3665 }
3666#else
3667 NOREF(GCPhysPage);
3668#endif /* PGM_WITH_LARGE_PAGES */
3669
3670 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3671 if (u16)
3672 {
3673 /*
3674 * The zero page is currently screwing up the tracking and we'll
3675 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3676 * is defined, zero pages won't normally be mapped. Some kind of solution
3677 * will be needed for this problem of course, but it will have to wait...
3678 */
3679 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3680 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3681 rc = VINF_PGM_GCPHYS_ALIASED;
3682 else
3683 {
3684# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC) /** @todo we can drop this now. */
3685 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3686 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3687 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
3688# endif
3689
3690 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3691 {
3692 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3693 pgmPoolTrackFlushGCPhysPT(pVM,
3694 pPhysPage,
3695 fFlushPTEs,
3696 PGMPOOL_TD_GET_IDX(u16));
3697 }
3698 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3699 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3700 else
3701 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3702 *pfFlushTLBs = true;
3703
3704# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
3705 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
3706# endif
3707 }
3708 }
3709
3710 if (rc == VINF_PGM_GCPHYS_ALIASED)
3711 {
3712 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3713 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3714 rc = VINF_PGM_SYNC_CR3;
3715 }
3716 pgmUnlock(pVM);
3717 return rc;
3718}
3719
3720
3721/**
3722 * Scans all shadow page tables for mappings of a physical page.
3723 *
3724 * This may be slow, but it's most likely more efficient than cleaning
3725 * out the entire page pool / cache.
3726 *
3727 * @returns VBox status code.
3728 * @retval VINF_SUCCESS if all references has been successfully cleared.
3729 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3730 * a page pool cleaning.
3731 *
3732 * @param pVM The cross context VM structure.
3733 * @param pPhysPage The guest page in question.
3734 */
3735int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3736{
3737 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3738 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3739 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3740 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3741
3742 /*
3743 * There is a limit to what makes sense.
3744 */
3745 if ( pPool->cPresent > 1024
3746 && pVM->cCpus == 1)
3747 {
3748 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3749 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3750 return VINF_PGM_GCPHYS_ALIASED;
3751 }
3752
3753 /*
3754 * Iterate all the pages until we've encountered all that in use.
3755 * This is simple but not quite optimal solution.
3756 */
3757 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P; /** @todo drop X86_PTE_P here as we always test if present separately, anyway. */
3758 const uint32_t u32 = u64; /** @todo move into the 32BIT_PT_xx case */
3759 unsigned cLeft = pPool->cUsedPages;
3760 unsigned iPage = pPool->cCurPages;
3761 while (--iPage >= PGMPOOL_IDX_FIRST)
3762 {
3763 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3764 if ( pPage->GCPhys != NIL_RTGCPHYS
3765 && pPage->cPresent)
3766 {
3767 switch (pPage->enmKind)
3768 {
3769 /*
3770 * We only care about shadow page tables.
3771 */
3772 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3773 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3774 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3775 {
3776 unsigned cPresent = pPage->cPresent;
3777 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3778 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3779 if (pPT->a[i].n.u1Present)
3780 {
3781 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3782 {
3783 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3784 pPT->a[i].u = 0;
3785
3786 /* Update the counter as we're removing references. */
3787 Assert(pPage->cPresent);
3788 Assert(pPool->cPresent);
3789 pPage->cPresent--;
3790 pPool->cPresent--;
3791 }
3792 if (!--cPresent)
3793 break;
3794 }
3795 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3796 break;
3797 }
3798
3799 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3800 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3801 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3802 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3803 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3804 {
3805 unsigned cPresent = pPage->cPresent;
3806 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3807 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3808 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3809 {
3810 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3811 {
3812 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3813 PGMSHWPTEPAE_SET(pPT->a[i], 0); /// @todo why not atomic?
3814
3815 /* Update the counter as we're removing references. */
3816 Assert(pPage->cPresent);
3817 Assert(pPool->cPresent);
3818 pPage->cPresent--;
3819 pPool->cPresent--;
3820 }
3821 if (!--cPresent)
3822 break;
3823 }
3824 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3825 break;
3826 }
3827#ifndef IN_RC
3828 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3829 {
3830 unsigned cPresent = pPage->cPresent;
3831 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3832 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3833 if (pPT->a[i].n.u1Present)
3834 {
3835 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3836 {
3837 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3838 pPT->a[i].u = 0;
3839
3840 /* Update the counter as we're removing references. */
3841 Assert(pPage->cPresent);
3842 Assert(pPool->cPresent);
3843 pPage->cPresent--;
3844 pPool->cPresent--;
3845 }
3846 if (!--cPresent)
3847 break;
3848 }
3849 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3850 break;
3851 }
3852#endif
3853 }
3854 if (!--cLeft)
3855 break;
3856 }
3857 }
3858
3859 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3860 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3861
3862 /*
3863 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3864 */
3865 if (pPool->cPresent > 1024)
3866 {
3867 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3868 return VINF_PGM_GCPHYS_ALIASED;
3869 }
3870
3871 return VINF_SUCCESS;
3872}
3873
3874
3875/**
3876 * Clears the user entry in a user table.
3877 *
3878 * This is used to remove all references to a page when flushing it.
3879 */
3880static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3881{
3882 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3883 Assert(pUser->iUser < pPool->cCurPages);
3884 uint32_t iUserTable = pUser->iUserTable;
3885
3886 /*
3887 * Map the user page. Ignore references made by fictitious pages.
3888 */
3889 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3890 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3891 union
3892 {
3893 uint64_t *pau64;
3894 uint32_t *pau32;
3895 } u;
3896 if (pUserPage->idx < PGMPOOL_IDX_FIRST)
3897 {
3898 Assert(!pUserPage->pvPageR3);
3899 return;
3900 }
3901 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3902
3903
3904 /* Safety precaution in case we change the paging for other modes too in the future. */
3905 Assert(!pgmPoolIsPageLocked(pPage)); RT_NOREF_PV(pPage);
3906
3907#ifdef VBOX_STRICT
3908 /*
3909 * Some sanity checks.
3910 */
3911 switch (pUserPage->enmKind)
3912 {
3913 case PGMPOOLKIND_32BIT_PD:
3914 case PGMPOOLKIND_32BIT_PD_PHYS:
3915 Assert(iUserTable < X86_PG_ENTRIES);
3916 break;
3917 case PGMPOOLKIND_PAE_PDPT:
3918 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3919 case PGMPOOLKIND_PAE_PDPT_PHYS:
3920 Assert(iUserTable < 4);
3921 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3922 break;
3923 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3924 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3925 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3926 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3927 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3928 case PGMPOOLKIND_PAE_PD_PHYS:
3929 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3930 break;
3931 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3932 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3933 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3934 break;
3935 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3936 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3937 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3938 break;
3939 case PGMPOOLKIND_64BIT_PML4:
3940 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3941 /* GCPhys >> PAGE_SHIFT is the index here */
3942 break;
3943 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3944 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3945 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3946 break;
3947
3948 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3949 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3950 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3951 break;
3952
3953 case PGMPOOLKIND_ROOT_NESTED:
3954 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3955 break;
3956
3957 default:
3958 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3959 break;
3960 }
3961#endif /* VBOX_STRICT */
3962
3963 /*
3964 * Clear the entry in the user page.
3965 */
3966 switch (pUserPage->enmKind)
3967 {
3968 /* 32-bit entries */
3969 case PGMPOOLKIND_32BIT_PD:
3970 case PGMPOOLKIND_32BIT_PD_PHYS:
3971 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
3972 break;
3973
3974 /* 64-bit entries */
3975 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3976 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3977 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3978 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3979 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3980#ifdef IN_RC
3981 /*
3982 * In 32 bits PAE mode we *must* invalidate the TLB when changing a
3983 * PDPT entry; the CPU fetches them only during cr3 load, so any
3984 * non-present PDPT will continue to cause page faults.
3985 */
3986 ASMReloadCR3();
3987#endif
3988 RT_FALL_THRU();
3989 case PGMPOOLKIND_PAE_PD_PHYS:
3990 case PGMPOOLKIND_PAE_PDPT_PHYS:
3991 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3992 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3993 case PGMPOOLKIND_64BIT_PML4:
3994 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3995 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3996 case PGMPOOLKIND_PAE_PDPT:
3997 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3998 case PGMPOOLKIND_ROOT_NESTED:
3999 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4000 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4001 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
4002 break;
4003
4004 default:
4005 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
4006 }
4007 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
4008}
4009
4010
4011/**
4012 * Clears all users of a page.
4013 */
4014static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4015{
4016 /*
4017 * Free all the user records.
4018 */
4019 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
4020
4021 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
4022 uint16_t i = pPage->iUserHead;
4023 while (i != NIL_PGMPOOL_USER_INDEX)
4024 {
4025 /* Clear enter in user table. */
4026 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
4027
4028 /* Free it. */
4029 const uint16_t iNext = paUsers[i].iNext;
4030 paUsers[i].iUser = NIL_PGMPOOL_IDX;
4031 paUsers[i].iNext = pPool->iUserFreeHead;
4032 pPool->iUserFreeHead = i;
4033
4034 /* Next. */
4035 i = iNext;
4036 }
4037 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
4038}
4039
4040
4041/**
4042 * Allocates a new physical cross reference extent.
4043 *
4044 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
4045 * @param pVM The cross context VM structure.
4046 * @param piPhysExt Where to store the phys ext index.
4047 */
4048PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
4049{
4050 PGM_LOCK_ASSERT_OWNER(pVM);
4051 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4052 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
4053 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4054 {
4055 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
4056 return NULL;
4057 }
4058 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4059 pPool->iPhysExtFreeHead = pPhysExt->iNext;
4060 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4061 *piPhysExt = iPhysExt;
4062 return pPhysExt;
4063}
4064
4065
4066/**
4067 * Frees a physical cross reference extent.
4068 *
4069 * @param pVM The cross context VM structure.
4070 * @param iPhysExt The extent to free.
4071 */
4072void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
4073{
4074 PGM_LOCK_ASSERT_OWNER(pVM);
4075 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4076 Assert(iPhysExt < pPool->cMaxPhysExts);
4077 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4078 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4079 {
4080 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4081 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4082 }
4083 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4084 pPool->iPhysExtFreeHead = iPhysExt;
4085}
4086
4087
4088/**
4089 * Frees a physical cross reference extent.
4090 *
4091 * @param pVM The cross context VM structure.
4092 * @param iPhysExt The extent to free.
4093 */
4094void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
4095{
4096 PGM_LOCK_ASSERT_OWNER(pVM);
4097 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4098
4099 const uint16_t iPhysExtStart = iPhysExt;
4100 PPGMPOOLPHYSEXT pPhysExt;
4101 do
4102 {
4103 Assert(iPhysExt < pPool->cMaxPhysExts);
4104 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4105 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4106 {
4107 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4108 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4109 }
4110
4111 /* next */
4112 iPhysExt = pPhysExt->iNext;
4113 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4114
4115 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4116 pPool->iPhysExtFreeHead = iPhysExtStart;
4117}
4118
4119
4120/**
4121 * Insert a reference into a list of physical cross reference extents.
4122 *
4123 * @returns The new tracking data for PGMPAGE.
4124 *
4125 * @param pVM The cross context VM structure.
4126 * @param iPhysExt The physical extent index of the list head.
4127 * @param iShwPT The shadow page table index.
4128 * @param iPte Page table entry
4129 *
4130 */
4131static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
4132{
4133 PGM_LOCK_ASSERT_OWNER(pVM);
4134 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4135 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4136
4137 /*
4138 * Special common cases.
4139 */
4140 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
4141 {
4142 paPhysExts[iPhysExt].aidx[1] = iShwPT;
4143 paPhysExts[iPhysExt].apte[1] = iPte;
4144 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4145 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
4146 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4147 }
4148 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
4149 {
4150 paPhysExts[iPhysExt].aidx[2] = iShwPT;
4151 paPhysExts[iPhysExt].apte[2] = iPte;
4152 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4153 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
4154 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4155 }
4156 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
4157
4158 /*
4159 * General treatment.
4160 */
4161 const uint16_t iPhysExtStart = iPhysExt;
4162 unsigned cMax = 15;
4163 for (;;)
4164 {
4165 Assert(iPhysExt < pPool->cMaxPhysExts);
4166 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4167 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
4168 {
4169 paPhysExts[iPhysExt].aidx[i] = iShwPT;
4170 paPhysExts[iPhysExt].apte[i] = iPte;
4171 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4172 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
4173 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
4174 }
4175 if (!--cMax)
4176 {
4177 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackOverflows);
4178 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4179 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
4180 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4181 }
4182
4183 /* advance */
4184 iPhysExt = paPhysExts[iPhysExt].iNext;
4185 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4186 break;
4187 }
4188
4189 /*
4190 * Add another extent to the list.
4191 */
4192 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4193 if (!pNew)
4194 {
4195 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackNoExtentsLeft);
4196 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4197 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4198 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4199 }
4200 pNew->iNext = iPhysExtStart;
4201 pNew->aidx[0] = iShwPT;
4202 pNew->apte[0] = iPte;
4203 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4204 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4205}
4206
4207
4208/**
4209 * Add a reference to guest physical page where extents are in use.
4210 *
4211 * @returns The new tracking data for PGMPAGE.
4212 *
4213 * @param pVM The cross context VM structure.
4214 * @param pPhysPage Pointer to the aPages entry in the ram range.
4215 * @param u16 The ram range flags (top 16-bits).
4216 * @param iShwPT The shadow page table index.
4217 * @param iPte Page table entry
4218 */
4219uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4220{
4221 pgmLock(pVM);
4222 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4223 {
4224 /*
4225 * Convert to extent list.
4226 */
4227 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4228 uint16_t iPhysExt;
4229 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4230 if (pPhysExt)
4231 {
4232 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4233 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliased);
4234 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4235 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4236 pPhysExt->aidx[1] = iShwPT;
4237 pPhysExt->apte[1] = iPte;
4238 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4239 }
4240 else
4241 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4242 }
4243 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4244 {
4245 /*
4246 * Insert into the extent list.
4247 */
4248 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4249 }
4250 else
4251 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedLots);
4252 pgmUnlock(pVM);
4253 return u16;
4254}
4255
4256
4257/**
4258 * Clear references to guest physical memory.
4259 *
4260 * @param pPool The pool.
4261 * @param pPage The page.
4262 * @param pPhysPage Pointer to the aPages entry in the ram range.
4263 * @param iPte Shadow PTE index
4264 */
4265void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4266{
4267 PVM pVM = pPool->CTX_SUFF(pVM);
4268 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4269 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4270
4271 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4272 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4273 {
4274 pgmLock(pVM);
4275
4276 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4277 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4278 do
4279 {
4280 Assert(iPhysExt < pPool->cMaxPhysExts);
4281
4282 /*
4283 * Look for the shadow page and check if it's all freed.
4284 */
4285 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4286 {
4287 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4288 && paPhysExts[iPhysExt].apte[i] == iPte)
4289 {
4290 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4291 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4292
4293 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4294 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4295 {
4296 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4297 pgmUnlock(pVM);
4298 return;
4299 }
4300
4301 /* we can free the node. */
4302 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4303 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4304 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4305 {
4306 /* lonely node */
4307 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4308 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4309 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4310 }
4311 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4312 {
4313 /* head */
4314 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4315 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4316 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4317 }
4318 else
4319 {
4320 /* in list */
4321 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4322 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4323 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4324 }
4325 iPhysExt = iPhysExtNext;
4326 pgmUnlock(pVM);
4327 return;
4328 }
4329 }
4330
4331 /* next */
4332 iPhysExtPrev = iPhysExt;
4333 iPhysExt = paPhysExts[iPhysExt].iNext;
4334 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4335
4336 pgmUnlock(pVM);
4337 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4338 }
4339 else /* nothing to do */
4340 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4341}
4342
4343/**
4344 * Clear references to guest physical memory.
4345 *
4346 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4347 * physical address is assumed to be correct, so the linear search can be
4348 * skipped and we can assert at an earlier point.
4349 *
4350 * @param pPool The pool.
4351 * @param pPage The page.
4352 * @param HCPhys The host physical address corresponding to the guest page.
4353 * @param GCPhys The guest physical address corresponding to HCPhys.
4354 * @param iPte Shadow PTE index
4355 */
4356static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4357{
4358 /*
4359 * Lookup the page and check if it checks out before derefing it.
4360 */
4361 PVM pVM = pPool->CTX_SUFF(pVM);
4362 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4363 if (pPhysPage)
4364 {
4365 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4366#ifdef LOG_ENABLED
4367 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4368 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4369#endif
4370 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4371 {
4372 Assert(pPage->cPresent);
4373 Assert(pPool->cPresent);
4374 pPage->cPresent--;
4375 pPool->cPresent--;
4376 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4377 return;
4378 }
4379
4380 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp\n",
4381 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage)));
4382 }
4383 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4384}
4385
4386
4387/**
4388 * Clear references to guest physical memory.
4389 *
4390 * @param pPool The pool.
4391 * @param pPage The page.
4392 * @param HCPhys The host physical address corresponding to the guest page.
4393 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4394 * @param iPte Shadow pte index
4395 */
4396void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4397{
4398 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4399
4400 /*
4401 * Try the hint first.
4402 */
4403 RTHCPHYS HCPhysHinted;
4404 PVM pVM = pPool->CTX_SUFF(pVM);
4405 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4406 if (pPhysPage)
4407 {
4408 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4409 Assert(HCPhysHinted);
4410 if (HCPhysHinted == HCPhys)
4411 {
4412 Assert(pPage->cPresent);
4413 Assert(pPool->cPresent);
4414 pPage->cPresent--;
4415 pPool->cPresent--;
4416 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4417 return;
4418 }
4419 }
4420 else
4421 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4422
4423 /*
4424 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4425 */
4426 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4427 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX);
4428 while (pRam)
4429 {
4430 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4431 while (iPage-- > 0)
4432 {
4433 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4434 {
4435 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4436 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4437 Assert(pPage->cPresent);
4438 Assert(pPool->cPresent);
4439 pPage->cPresent--;
4440 pPool->cPresent--;
4441 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4442 return;
4443 }
4444 }
4445 pRam = pRam->CTX_SUFF(pNext);
4446 }
4447
4448 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4449}
4450
4451
4452/**
4453 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4454 *
4455 * @param pPool The pool.
4456 * @param pPage The page.
4457 * @param pShwPT The shadow page table (mapping of the page).
4458 * @param pGstPT The guest page table.
4459 */
4460DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4461{
4462 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4463 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4464 {
4465 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4466 if (pShwPT->a[i].n.u1Present)
4467 {
4468 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4469 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4470 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & fPgMask, i);
4471 if (!pPage->cPresent)
4472 break;
4473 }
4474 }
4475}
4476
4477
4478/**
4479 * Clear references to guest physical memory in a PAE / 32-bit page table.
4480 *
4481 * @param pPool The pool.
4482 * @param pPage The page.
4483 * @param pShwPT The shadow page table (mapping of the page).
4484 * @param pGstPT The guest page table (just a half one).
4485 */
4486DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4487{
4488 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4489 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4490 {
4491 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4492 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4493 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4494 {
4495 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4496 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4497 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4498 if (!pPage->cPresent)
4499 break;
4500 }
4501 }
4502}
4503
4504
4505/**
4506 * Clear references to guest physical memory in a PAE / PAE page table.
4507 *
4508 * @param pPool The pool.
4509 * @param pPage The page.
4510 * @param pShwPT The shadow page table (mapping of the page).
4511 * @param pGstPT The guest page table.
4512 */
4513DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4514{
4515 RTGCPHYS const fPgMask = pPage->fA20Enabled ? X86_PTE_PAE_PG_MASK : X86_PTE_PAE_PG_MASK & ~RT_BIT_64(20);
4516 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4517 {
4518 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4519 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4520 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4521 {
4522 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4523 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4524 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4525 if (!pPage->cPresent)
4526 break;
4527 }
4528 }
4529}
4530
4531
4532/**
4533 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4534 *
4535 * @param pPool The pool.
4536 * @param pPage The page.
4537 * @param pShwPT The shadow page table (mapping of the page).
4538 */
4539DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4540{
4541 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4542 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4543 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4544 {
4545 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4546 if (pShwPT->a[i].n.u1Present)
4547 {
4548 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4549 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4550 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4551 if (!pPage->cPresent)
4552 break;
4553 }
4554 }
4555}
4556
4557
4558/**
4559 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4560 *
4561 * @param pPool The pool.
4562 * @param pPage The page.
4563 * @param pShwPT The shadow page table (mapping of the page).
4564 */
4565DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4566{
4567 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4568 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4569 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4570 {
4571 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4572 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4573 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4574 {
4575 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4576 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4577 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys & GCPhysA20Mask, i);
4578 if (!pPage->cPresent)
4579 break;
4580 }
4581 }
4582}
4583
4584
4585/**
4586 * Clear references to shadowed pages in an EPT page table.
4587 *
4588 * @param pPool The pool.
4589 * @param pPage The page.
4590 * @param pShwPT The shadow page directory pointer table (mapping of the
4591 * page).
4592 */
4593DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4594{
4595 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4596 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4597 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4598 {
4599 Assert((pShwPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4600 if (pShwPT->a[i].n.u1Present)
4601 {
4602 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4603 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4604 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4605 if (!pPage->cPresent)
4606 break;
4607 }
4608 }
4609}
4610
4611
4612/**
4613 * Clear references to shadowed pages in a 32 bits page directory.
4614 *
4615 * @param pPool The pool.
4616 * @param pPage The page.
4617 * @param pShwPD The shadow page directory (mapping of the page).
4618 */
4619DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4620{
4621 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4622 {
4623 if ( pShwPD->a[i].n.u1Present
4624 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4625 )
4626 {
4627 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4628 if (pSubPage)
4629 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4630 else
4631 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4632 }
4633 }
4634}
4635
4636
4637/**
4638 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4639 *
4640 * @param pPool The pool.
4641 * @param pPage The page.
4642 * @param pShwPD The shadow page directory (mapping of the page).
4643 */
4644DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4645{
4646 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4647 {
4648 if ( pShwPD->a[i].n.u1Present
4649 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING))
4650 {
4651#ifdef PGM_WITH_LARGE_PAGES
4652 if (pShwPD->a[i].b.u1Size)
4653 {
4654 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4655 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4656 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK,
4657 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4658 i);
4659 }
4660 else
4661#endif
4662 {
4663 Assert((pShwPD->a[i].u & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000000))) == 0);
4664 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4665 if (pSubPage)
4666 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4667 else
4668 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4669 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4670 }
4671 }
4672 }
4673}
4674
4675
4676/**
4677 * Clear references to shadowed pages in a PAE page directory pointer table.
4678 *
4679 * @param pPool The pool.
4680 * @param pPage The page.
4681 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4682 */
4683DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4684{
4685 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4686 {
4687 Assert((pShwPDPT->a[i].u & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4688 if ( pShwPDPT->a[i].n.u1Present
4689 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4690 )
4691 {
4692 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4693 if (pSubPage)
4694 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4695 else
4696 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4697 }
4698 }
4699}
4700
4701
4702/**
4703 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4704 *
4705 * @param pPool The pool.
4706 * @param pPage The page.
4707 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4708 */
4709DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4710{
4711 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4712 {
4713 Assert((pShwPDPT->a[i].u & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4714 if (pShwPDPT->a[i].n.u1Present)
4715 {
4716 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4717 if (pSubPage)
4718 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4719 else
4720 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4721 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4722 }
4723 }
4724}
4725
4726
4727/**
4728 * Clear references to shadowed pages in a 64-bit level 4 page table.
4729 *
4730 * @param pPool The pool.
4731 * @param pPage The page.
4732 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4733 */
4734DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4735{
4736 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4737 {
4738 Assert((pShwPML4->a[i].u & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4739 if (pShwPML4->a[i].n.u1Present)
4740 {
4741 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4742 if (pSubPage)
4743 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4744 else
4745 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4746 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4747 }
4748 }
4749}
4750
4751
4752/**
4753 * Clear references to shadowed pages in an EPT page directory.
4754 *
4755 * @param pPool The pool.
4756 * @param pPage The page.
4757 * @param pShwPD The shadow page directory (mapping of the page).
4758 */
4759DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4760{
4761 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4762 {
4763 Assert((pShwPD->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4764 if (pShwPD->a[i].n.u1Present)
4765 {
4766#ifdef PGM_WITH_LARGE_PAGES
4767 if (pShwPD->a[i].b.u1Size)
4768 {
4769 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4770 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4771 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK,
4772 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4773 i);
4774 }
4775 else
4776#endif
4777 {
4778 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4779 if (pSubPage)
4780 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4781 else
4782 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4783 }
4784 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4785 }
4786 }
4787}
4788
4789
4790/**
4791 * Clear references to shadowed pages in an EPT page directory pointer table.
4792 *
4793 * @param pPool The pool.
4794 * @param pPage The page.
4795 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4796 */
4797DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4798{
4799 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4800 {
4801 Assert((pShwPDPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4802 if (pShwPDPT->a[i].n.u1Present)
4803 {
4804 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4805 if (pSubPage)
4806 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4807 else
4808 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4809 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4810 }
4811 }
4812}
4813
4814
4815/**
4816 * Clears all references made by this page.
4817 *
4818 * This includes other shadow pages and GC physical addresses.
4819 *
4820 * @param pPool The pool.
4821 * @param pPage The page.
4822 */
4823static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4824{
4825 /*
4826 * Map the shadow page and take action according to the page kind.
4827 */
4828 PVM pVM = pPool->CTX_SUFF(pVM);
4829 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4830 switch (pPage->enmKind)
4831 {
4832 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4833 {
4834 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4835 void *pvGst;
4836 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4837 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4838 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4839 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4840 break;
4841 }
4842
4843 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4844 {
4845 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4846 void *pvGst;
4847 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4848 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
4849 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4850 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4851 break;
4852 }
4853
4854 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4855 {
4856 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4857 void *pvGst;
4858 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4859 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
4860 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4861 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4862 break;
4863 }
4864
4865 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4866 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4867 {
4868 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4869 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4870 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4871 break;
4872 }
4873
4874 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4875 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4876 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4877 {
4878 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4879 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
4880 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4881 break;
4882 }
4883
4884 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4885 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4886 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4887 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4888 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4889 case PGMPOOLKIND_PAE_PD_PHYS:
4890 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4891 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4892 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4893 break;
4894
4895 case PGMPOOLKIND_32BIT_PD_PHYS:
4896 case PGMPOOLKIND_32BIT_PD:
4897 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4898 break;
4899
4900 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4901 case PGMPOOLKIND_PAE_PDPT:
4902 case PGMPOOLKIND_PAE_PDPT_PHYS:
4903 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4904 break;
4905
4906 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4907 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4908 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4909 break;
4910
4911 case PGMPOOLKIND_64BIT_PML4:
4912 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4913 break;
4914
4915 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4916 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4917 break;
4918
4919 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4920 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4921 break;
4922
4923 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4924 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4925 break;
4926
4927 default:
4928 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4929 }
4930
4931 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4932 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4933 ASMMemZeroPage(pvShw);
4934 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4935 pPage->fZeroed = true;
4936 Assert(!pPage->cPresent);
4937 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
4938}
4939
4940
4941/**
4942 * Flushes a pool page.
4943 *
4944 * This moves the page to the free list after removing all user references to it.
4945 *
4946 * @returns VBox status code.
4947 * @retval VINF_SUCCESS on success.
4948 * @param pPool The pool.
4949 * @param pPage The shadow page.
4950 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4951 */
4952int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4953{
4954 PVM pVM = pPool->CTX_SUFF(pVM);
4955 bool fFlushRequired = false;
4956
4957 int rc = VINF_SUCCESS;
4958 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4959 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4960 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4961
4962 /*
4963 * Reject any attempts at flushing any of the special root pages (shall
4964 * not happen).
4965 */
4966 AssertMsgReturn(pPage->idx >= PGMPOOL_IDX_FIRST,
4967 ("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n",
4968 pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx),
4969 VINF_SUCCESS);
4970
4971 pgmLock(pVM);
4972
4973 /*
4974 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4975 */
4976 if (pgmPoolIsPageLocked(pPage))
4977 {
4978 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4979 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4980 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4981 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4982 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4983 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4984 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4985 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4986 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
4987 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
4988 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4989 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4990 pgmUnlock(pVM);
4991 return VINF_SUCCESS;
4992 }
4993
4994#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4995 /* Start a subset so we won't run out of mapping space. */
4996 PVMCPU pVCpu = VMMGetCpu(pVM);
4997 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
4998#endif
4999
5000 /*
5001 * Mark the page as being in need of an ASMMemZeroPage().
5002 */
5003 pPage->fZeroed = false;
5004
5005#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5006 if (pPage->fDirty)
5007 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirtyEntry, false /* do not remove */);
5008#endif
5009
5010 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
5011 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
5012 fFlushRequired = true;
5013
5014 /*
5015 * Clear the page.
5016 */
5017 pgmPoolTrackClearPageUsers(pPool, pPage);
5018 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
5019 pgmPoolTrackDeref(pPool, pPage);
5020 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
5021
5022 /*
5023 * Flush it from the cache.
5024 */
5025 pgmPoolCacheFlushPage(pPool, pPage);
5026
5027#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
5028 /* Heavy stuff done. */
5029 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
5030#endif
5031
5032 /*
5033 * Deregistering the monitoring.
5034 */
5035 if (pPage->fMonitored)
5036 rc = pgmPoolMonitorFlush(pPool, pPage);
5037
5038 /*
5039 * Free the page.
5040 */
5041 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
5042 pPage->iNext = pPool->iFreeHead;
5043 pPool->iFreeHead = pPage->idx;
5044 pPage->enmKind = PGMPOOLKIND_FREE;
5045 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5046 pPage->GCPhys = NIL_RTGCPHYS;
5047 pPage->fReusedFlushPending = false;
5048
5049 pPool->cUsedPages--;
5050
5051 /* Flush the TLBs of all VCPUs if required. */
5052 if ( fFlushRequired
5053 && fFlush)
5054 {
5055 PGM_INVL_ALL_VCPU_TLBS(pVM);
5056 }
5057
5058 pgmUnlock(pVM);
5059 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
5060 return rc;
5061}
5062
5063
5064/**
5065 * Frees a usage of a pool page.
5066 *
5067 * The caller is responsible to updating the user table so that it no longer
5068 * references the shadow page.
5069 *
5070 * @param pPool The pool.
5071 * @param pPage The shadow page.
5072 * @param iUser The shadow page pool index of the user table.
5073 * NIL_PGMPOOL_IDX for root pages.
5074 * @param iUserTable The index into the user table (shadowed). Ignored if
5075 * root page.
5076 */
5077void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
5078{
5079 PVM pVM = pPool->CTX_SUFF(pVM);
5080
5081 STAM_PROFILE_START(&pPool->StatFree, a);
5082 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
5083 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
5084 AssertReturnVoid(pPage->idx >= PGMPOOL_IDX_FIRST); /* paranoia (#6349) */
5085
5086 pgmLock(pVM);
5087 if (iUser != NIL_PGMPOOL_IDX)
5088 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
5089 if (!pPage->fCached)
5090 pgmPoolFlushPage(pPool, pPage);
5091 pgmUnlock(pVM);
5092 STAM_PROFILE_STOP(&pPool->StatFree, a);
5093}
5094
5095
5096/**
5097 * Makes one or more free page free.
5098 *
5099 * @returns VBox status code.
5100 * @retval VINF_SUCCESS on success.
5101 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
5102 *
5103 * @param pPool The pool.
5104 * @param enmKind Page table kind
5105 * @param iUser The user of the page.
5106 */
5107static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
5108{
5109 PVM pVM = pPool->CTX_SUFF(pVM);
5110 LogFlow(("pgmPoolMakeMoreFreePages: enmKind=%d iUser=%d\n", enmKind, iUser));
5111 NOREF(enmKind);
5112
5113 /*
5114 * If the pool isn't full grown yet, expand it.
5115 */
5116 if ( pPool->cCurPages < pPool->cMaxPages
5117#if defined(IN_RC)
5118 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
5119 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
5120 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
5121#endif
5122 )
5123 {
5124 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
5125#ifdef IN_RING3
5126 int rc = PGMR3PoolGrow(pVM);
5127#else
5128 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
5129#endif
5130 if (RT_FAILURE(rc))
5131 return rc;
5132 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
5133 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
5134 return VINF_SUCCESS;
5135 }
5136
5137 /*
5138 * Free one cached page.
5139 */
5140 return pgmPoolCacheFreeOne(pPool, iUser);
5141}
5142
5143
5144/**
5145 * Allocates a page from the pool.
5146 *
5147 * This page may actually be a cached page and not in need of any processing
5148 * on the callers part.
5149 *
5150 * @returns VBox status code.
5151 * @retval VINF_SUCCESS if a NEW page was allocated.
5152 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
5153 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
5154 *
5155 * @param pVM The cross context VM structure.
5156 * @param GCPhys The GC physical address of the page we're gonna shadow.
5157 * For 4MB and 2MB PD entries, it's the first address the
5158 * shadow PT is covering.
5159 * @param enmKind The kind of mapping.
5160 * @param enmAccess Access type for the mapping (only relevant for big pages)
5161 * @param fA20Enabled Whether the A20 gate is enabled or not.
5162 * @param iUser The shadow page pool index of the user table. Root
5163 * pages should pass NIL_PGMPOOL_IDX.
5164 * @param iUserTable The index into the user table (shadowed). Ignored for
5165 * root pages (iUser == NIL_PGMPOOL_IDX).
5166 * @param fLockPage Lock the page
5167 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
5168 */
5169int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
5170 uint16_t iUser, uint32_t iUserTable, bool fLockPage, PPPGMPOOLPAGE ppPage)
5171{
5172 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5173 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
5174 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
5175 *ppPage = NULL;
5176 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
5177 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
5178 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
5179
5180 pgmLock(pVM);
5181
5182 if (pPool->fCacheEnabled)
5183 {
5184 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, fA20Enabled, iUser, iUserTable, ppPage);
5185 if (RT_SUCCESS(rc2))
5186 {
5187 if (fLockPage)
5188 pgmPoolLockPage(pPool, *ppPage);
5189 pgmUnlock(pVM);
5190 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5191 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
5192 return rc2;
5193 }
5194 }
5195
5196 /*
5197 * Allocate a new one.
5198 */
5199 int rc = VINF_SUCCESS;
5200 uint16_t iNew = pPool->iFreeHead;
5201 if (iNew == NIL_PGMPOOL_IDX)
5202 {
5203 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
5204 if (RT_FAILURE(rc))
5205 {
5206 pgmUnlock(pVM);
5207 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
5208 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5209 return rc;
5210 }
5211 iNew = pPool->iFreeHead;
5212 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_PGM_POOL_IPE);
5213 }
5214
5215 /* unlink the free head */
5216 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5217 pPool->iFreeHead = pPage->iNext;
5218 pPage->iNext = NIL_PGMPOOL_IDX;
5219
5220 /*
5221 * Initialize it.
5222 */
5223 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5224 pPage->enmKind = enmKind;
5225 pPage->enmAccess = enmAccess;
5226 pPage->GCPhys = GCPhys;
5227 pPage->fA20Enabled = fA20Enabled;
5228 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5229 pPage->fMonitored = false;
5230 pPage->fCached = false;
5231 pPage->fDirty = false;
5232 pPage->fReusedFlushPending = false;
5233 pPage->cModifications = 0;
5234 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5235 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5236 pPage->cPresent = 0;
5237 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5238 pPage->idxDirtyEntry = 0;
5239 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5240 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5241 pPage->cLastAccessHandler = 0;
5242 pPage->cLocked = 0;
5243# ifdef VBOX_STRICT
5244 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5245# endif
5246
5247 /*
5248 * Insert into the tracking and cache. If this fails, free the page.
5249 */
5250 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5251 if (RT_FAILURE(rc3))
5252 {
5253 pPool->cUsedPages--;
5254 pPage->enmKind = PGMPOOLKIND_FREE;
5255 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5256 pPage->GCPhys = NIL_RTGCPHYS;
5257 pPage->iNext = pPool->iFreeHead;
5258 pPool->iFreeHead = pPage->idx;
5259 pgmUnlock(pVM);
5260 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5261 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
5262 return rc3;
5263 }
5264
5265 /*
5266 * Commit the allocation, clear the page and return.
5267 */
5268#ifdef VBOX_WITH_STATISTICS
5269 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5270 pPool->cUsedPagesHigh = pPool->cUsedPages;
5271#endif
5272
5273 if (!pPage->fZeroed)
5274 {
5275 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5276 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5277 ASMMemZeroPage(pv);
5278 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5279 }
5280
5281 *ppPage = pPage;
5282 if (fLockPage)
5283 pgmPoolLockPage(pPool, pPage);
5284 pgmUnlock(pVM);
5285 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5286 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5287 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5288 return rc;
5289}
5290
5291
5292/**
5293 * Frees a usage of a pool page.
5294 *
5295 * @param pVM The cross context VM structure.
5296 * @param HCPhys The HC physical address of the shadow page.
5297 * @param iUser The shadow page pool index of the user table.
5298 * NIL_PGMPOOL_IDX if root page.
5299 * @param iUserTable The index into the user table (shadowed). Ignored if
5300 * root page.
5301 */
5302void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5303{
5304 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5305 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5306 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5307}
5308
5309
5310/**
5311 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5312 *
5313 * @returns Pointer to the shadow page structure.
5314 * @param pPool The pool.
5315 * @param HCPhys The HC physical address of the shadow page.
5316 */
5317PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5318{
5319 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5320
5321 /*
5322 * Look up the page.
5323 */
5324 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5325
5326 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5327 return pPage;
5328}
5329
5330
5331/**
5332 * Internal worker for finding a page for debugging purposes, no assertions.
5333 *
5334 * @returns Pointer to the shadow page structure. NULL on if not found.
5335 * @param pPool The pool.
5336 * @param HCPhys The HC physical address of the shadow page.
5337 */
5338PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5339{
5340 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5341 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5342}
5343
5344#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5345
5346/**
5347 * Flush the specified page if present
5348 *
5349 * @param pVM The cross context VM structure.
5350 * @param GCPhys Guest physical address of the page to flush
5351 */
5352void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5353{
5354 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5355
5356 VM_ASSERT_EMT(pVM);
5357
5358 /*
5359 * Look up the GCPhys in the hash.
5360 */
5361 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5362 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5363 if (i == NIL_PGMPOOL_IDX)
5364 return;
5365
5366 do
5367 {
5368 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5369 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5370 {
5371 switch (pPage->enmKind)
5372 {
5373 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5374 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5375 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5376 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5377 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5378 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5379 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5380 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5381 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5382 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5383 case PGMPOOLKIND_64BIT_PML4:
5384 case PGMPOOLKIND_32BIT_PD:
5385 case PGMPOOLKIND_PAE_PDPT:
5386 {
5387 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5388#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5389 if (pPage->fDirty)
5390 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5391 else
5392#endif
5393 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5394 Assert(!pgmPoolIsPageLocked(pPage));
5395 pgmPoolMonitorChainFlush(pPool, pPage);
5396 return;
5397 }
5398
5399 /* ignore, no monitoring. */
5400 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5401 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5402 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5403 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5404 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5405 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5406 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5407 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5408 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5409 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5410 case PGMPOOLKIND_ROOT_NESTED:
5411 case PGMPOOLKIND_PAE_PD_PHYS:
5412 case PGMPOOLKIND_PAE_PDPT_PHYS:
5413 case PGMPOOLKIND_32BIT_PD_PHYS:
5414 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5415 break;
5416
5417 default:
5418 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5419 }
5420 }
5421
5422 /* next */
5423 i = pPage->iNext;
5424 } while (i != NIL_PGMPOOL_IDX);
5425 return;
5426}
5427
5428#endif /* IN_RING3 */
5429#ifdef IN_RING3
5430
5431/**
5432 * Reset CPU on hot plugging.
5433 *
5434 * @param pVM The cross context VM structure.
5435 * @param pVCpu The cross context virtual CPU structure.
5436 */
5437void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5438{
5439 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5440
5441 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5442 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5443 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5444}
5445
5446
5447/**
5448 * Flushes the entire cache.
5449 *
5450 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5451 * this and execute this CR3 flush.
5452 *
5453 * @param pVM The cross context VM structure.
5454 */
5455void pgmR3PoolReset(PVM pVM)
5456{
5457 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5458
5459 PGM_LOCK_ASSERT_OWNER(pVM);
5460 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5461 LogFlow(("pgmR3PoolReset:\n"));
5462
5463 /*
5464 * If there are no pages in the pool, there is nothing to do.
5465 */
5466 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5467 {
5468 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5469 return;
5470 }
5471
5472 /*
5473 * Exit the shadow mode since we're going to clear everything,
5474 * including the root page.
5475 */
5476 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5477 pgmR3ExitShadowModeBeforePoolFlush(&pVM->aCpus[i]);
5478
5479 /*
5480 * Nuke the free list and reinsert all pages into it.
5481 */
5482 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5483 {
5484 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5485
5486 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
5487 if (pPage->fMonitored)
5488 pgmPoolMonitorFlush(pPool, pPage);
5489 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5490 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5491 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5492 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5493 pPage->GCPhys = NIL_RTGCPHYS;
5494 pPage->enmKind = PGMPOOLKIND_FREE;
5495 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5496 Assert(pPage->idx == i);
5497 pPage->iNext = i + 1;
5498 pPage->fA20Enabled = true;
5499 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5500 pPage->fSeenNonGlobal = false;
5501 pPage->fMonitored = false;
5502 pPage->fDirty = false;
5503 pPage->fCached = false;
5504 pPage->fReusedFlushPending = false;
5505 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5506 pPage->cPresent = 0;
5507 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5508 pPage->cModifications = 0;
5509 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5510 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5511 pPage->idxDirtyEntry = 0;
5512 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5513 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5514 pPage->cLastAccessHandler = 0;
5515 pPage->cLocked = 0;
5516#ifdef VBOX_STRICT
5517 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5518#endif
5519 }
5520 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5521 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5522 pPool->cUsedPages = 0;
5523
5524 /*
5525 * Zap and reinitialize the user records.
5526 */
5527 pPool->cPresent = 0;
5528 pPool->iUserFreeHead = 0;
5529 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5530 const unsigned cMaxUsers = pPool->cMaxUsers;
5531 for (unsigned i = 0; i < cMaxUsers; i++)
5532 {
5533 paUsers[i].iNext = i + 1;
5534 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5535 paUsers[i].iUserTable = 0xfffffffe;
5536 }
5537 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5538
5539 /*
5540 * Clear all the GCPhys links and rebuild the phys ext free list.
5541 */
5542 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
5543 pRam;
5544 pRam = pRam->CTX_SUFF(pNext))
5545 {
5546 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5547 while (iPage-- > 0)
5548 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5549 }
5550
5551 pPool->iPhysExtFreeHead = 0;
5552 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5553 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5554 for (unsigned i = 0; i < cMaxPhysExts; i++)
5555 {
5556 paPhysExts[i].iNext = i + 1;
5557 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5558 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5559 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5560 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5561 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5562 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5563 }
5564 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5565
5566 /*
5567 * Just zap the modified list.
5568 */
5569 pPool->cModifiedPages = 0;
5570 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5571
5572 /*
5573 * Clear the GCPhys hash and the age list.
5574 */
5575 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5576 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5577 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5578 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5579
5580#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5581 /* Clear all dirty pages. */
5582 pPool->idxFreeDirtyPage = 0;
5583 pPool->cDirtyPages = 0;
5584 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
5585 pPool->aDirtyPages[i].uIdx = NIL_PGMPOOL_IDX;
5586#endif
5587
5588 /*
5589 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5590 */
5591 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5592 {
5593 /*
5594 * Re-enter the shadowing mode and assert Sync CR3 FF.
5595 */
5596 PVMCPU pVCpu = &pVM->aCpus[i];
5597 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5598 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5599 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5600 }
5601
5602 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5603}
5604
5605#endif /* IN_RING3 */
5606
5607#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5608/**
5609 * Stringifies a PGMPOOLKIND value.
5610 */
5611static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5612{
5613 switch ((PGMPOOLKIND)enmKind)
5614 {
5615 case PGMPOOLKIND_INVALID:
5616 return "PGMPOOLKIND_INVALID";
5617 case PGMPOOLKIND_FREE:
5618 return "PGMPOOLKIND_FREE";
5619 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5620 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5621 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5622 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5623 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5624 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5625 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5626 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5627 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5628 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5629 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5630 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5631 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5632 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5633 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5634 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5635 case PGMPOOLKIND_32BIT_PD:
5636 return "PGMPOOLKIND_32BIT_PD";
5637 case PGMPOOLKIND_32BIT_PD_PHYS:
5638 return "PGMPOOLKIND_32BIT_PD_PHYS";
5639 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5640 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5641 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5642 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5643 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5644 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5645 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5646 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5647 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5648 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5649 case PGMPOOLKIND_PAE_PD_PHYS:
5650 return "PGMPOOLKIND_PAE_PD_PHYS";
5651 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5652 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5653 case PGMPOOLKIND_PAE_PDPT:
5654 return "PGMPOOLKIND_PAE_PDPT";
5655 case PGMPOOLKIND_PAE_PDPT_PHYS:
5656 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5657 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5658 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5659 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5660 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5661 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5662 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5663 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5664 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5665 case PGMPOOLKIND_64BIT_PML4:
5666 return "PGMPOOLKIND_64BIT_PML4";
5667 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5668 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5669 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5670 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5671 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5672 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5673 case PGMPOOLKIND_ROOT_NESTED:
5674 return "PGMPOOLKIND_ROOT_NESTED";
5675 }
5676 return "Unknown kind!";
5677}
5678#endif /* LOG_ENABLED || VBOX_STRICT */
5679
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette