VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 22343

Last change on this file since 22343 was 22343, checked in by vboxsync, 15 years ago

Attempt to detect full page table initialization early on.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 166.4 KB
Line 
1/* $Id: PGMAllPool.cpp 22343 2009-08-19 12:40:16Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_RC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37#include <VBox/hwacc_vmx.h>
38
39#include <VBox/log.h>
40#include <VBox/err.h>
41#include <iprt/asm.h>
42#include <iprt/string.h>
43
44
45/*******************************************************************************
46* Internal Functions *
47*******************************************************************************/
48RT_C_DECLS_BEGIN
49static void pgmPoolFlushAllInt(PPGMPOOL pPool);
50#ifdef PGMPOOL_WITH_USER_TRACKING
51DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
52DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
53static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
54#endif
55#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
56static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
57#endif
58#ifdef PGMPOOL_WITH_CACHE
59static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
60#endif
61#ifdef PGMPOOL_WITH_MONITORING
62static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
63#endif
64#ifndef IN_RING3
65DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
66#endif
67#ifdef LOG_ENABLED
68static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
69#endif
70
71void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs);
72void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt);
73int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
74PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
75void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
76void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
77
78RT_C_DECLS_END
79
80
81/**
82 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
83 *
84 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
85 * @param enmKind The page kind.
86 */
87DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
88{
89 switch (enmKind)
90 {
91 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
92 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
93 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
94 return true;
95 default:
96 return false;
97 }
98}
99
100/** @def PGMPOOL_PAGE_2_LOCKED_PTR
101 * Maps a pool page pool into the current context and lock it (RC only).
102 *
103 * @returns VBox status code.
104 * @param pVM The VM handle.
105 * @param pPage The pool page.
106 *
107 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
108 * small page window employeed by that function. Be careful.
109 * @remark There is no need to assert on the result.
110 */
111#if defined(IN_RC)
112DECLINLINE(void *) PGMPOOL_PAGE_2_LOCKED_PTR(PVM pVM, PPGMPOOLPAGE pPage)
113{
114 void *pv = pgmPoolMapPageInlined(&pVM->pgm.s, pPage);
115
116 /* Make sure the dynamic mapping will not be reused. */
117 if (pv)
118 PGMDynLockHCPage(pVM, (uint8_t *)pv);
119
120 return pv;
121}
122#else
123# define PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage) PGMPOOL_PAGE_2_PTR(pVM, pPage)
124#endif
125
126/** @def PGMPOOL_UNLOCK_PTR
127 * Unlock a previously locked dynamic caching (RC only).
128 *
129 * @returns VBox status code.
130 * @param pVM The VM handle.
131 * @param pPage The pool page.
132 *
133 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
134 * small page window employeed by that function. Be careful.
135 * @remark There is no need to assert on the result.
136 */
137#if defined(IN_RC)
138DECLINLINE(void) PGMPOOL_UNLOCK_PTR(PVM pVM, void *pvPage)
139{
140 if (pvPage)
141 PGMDynUnlockHCPage(pVM, (uint8_t *)pvPage);
142}
143#else
144# define PGMPOOL_UNLOCK_PTR(pVM, pPage) do {} while (0)
145#endif
146
147
148#ifdef PGMPOOL_WITH_MONITORING
149/**
150 * Determin the size of a write instruction.
151 * @returns number of bytes written.
152 * @param pDis The disassembler state.
153 */
154static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
155{
156 /*
157 * This is very crude and possibly wrong for some opcodes,
158 * but since it's not really supposed to be called we can
159 * probably live with that.
160 */
161 return DISGetParamSize(pDis, &pDis->param1);
162}
163
164
165/**
166 * Flushes a chain of pages sharing the same access monitor.
167 *
168 * @returns VBox status code suitable for scheduling.
169 * @param pPool The pool.
170 * @param pPage A page in the chain.
171 */
172int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
173{
174 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
175
176 /*
177 * Find the list head.
178 */
179 uint16_t idx = pPage->idx;
180 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
181 {
182 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
183 {
184 idx = pPage->iMonitoredPrev;
185 Assert(idx != pPage->idx);
186 pPage = &pPool->aPages[idx];
187 }
188 }
189
190 /*
191 * Iterate the list flushing each shadow page.
192 */
193 int rc = VINF_SUCCESS;
194 for (;;)
195 {
196 idx = pPage->iMonitoredNext;
197 Assert(idx != pPage->idx);
198 if (pPage->idx >= PGMPOOL_IDX_FIRST)
199 {
200 int rc2 = pgmPoolFlushPage(pPool, pPage);
201 AssertRC(rc2);
202 }
203 /* next */
204 if (idx == NIL_PGMPOOL_IDX)
205 break;
206 pPage = &pPool->aPages[idx];
207 }
208 return rc;
209}
210
211
212/**
213 * Wrapper for getting the current context pointer to the entry being modified.
214 *
215 * @returns VBox status code suitable for scheduling.
216 * @param pVM VM Handle.
217 * @param pvDst Destination address
218 * @param pvSrc Source guest virtual address.
219 * @param GCPhysSrc The source guest physical address.
220 * @param cb Size of data to read
221 */
222DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
223{
224#if defined(IN_RING3)
225 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
226 return VINF_SUCCESS;
227#else
228 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
229 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
230#endif
231}
232
233/**
234 * Process shadow entries before they are changed by the guest.
235 *
236 * For PT entries we will clear them. For PD entries, we'll simply check
237 * for mapping conflicts and set the SyncCR3 FF if found.
238 *
239 * @param pVCpu VMCPU handle
240 * @param pPool The pool.
241 * @param pPage The head page.
242 * @param GCPhysFault The guest physical fault address.
243 * @param uAddress In R0 and GC this is the guest context fault address (flat).
244 * In R3 this is the host context 'fault' address.
245 * @param pDis The disassembler state for figuring out the write size.
246 * This need not be specified if the caller knows we won't do cross entry accesses.
247 */
248void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, PDISCPUSTATE pDis)
249{
250 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%#x (idx=%#x)\n", pPage->iMonitoredPrev, pPage->idx));
251 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
252 const unsigned cbWrite = pDis ? pgmPoolDisasWriteSize(pDis) : 0;
253 PVM pVM = pPool->CTX_SUFF(pVM);
254
255 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp kind=%s cbWrite=%d\n", (RTGCPTR)pvAddress, GCPhysFault, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
256 for (;;)
257 {
258 union
259 {
260 void *pv;
261 PX86PT pPT;
262 PX86PTPAE pPTPae;
263 PX86PD pPD;
264 PX86PDPAE pPDPae;
265 PX86PDPT pPDPT;
266 PX86PML4 pPML4;
267 } uShw;
268
269 uShw.pv = NULL;
270 switch (pPage->enmKind)
271 {
272 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
273 {
274 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
275 const unsigned iShw = off / sizeof(X86PTE);
276 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
277 if (uShw.pPT->a[iShw].n.u1Present)
278 {
279# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
280 X86PTE GstPte;
281
282 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
283 AssertRC(rc);
284 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
285 pgmPoolTracDerefGCPhysHint(pPool, pPage,
286 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
287 GstPte.u & X86_PTE_PG_MASK);
288# endif
289 ASMAtomicWriteSize(&uShw.pPT->a[iShw], 0);
290 }
291 break;
292 }
293
294 /* page/2 sized */
295 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
296 {
297 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
298 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
299 {
300 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
301 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
302 if (uShw.pPTPae->a[iShw].n.u1Present)
303 {
304# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
305 X86PTE GstPte;
306 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
307 AssertRC(rc);
308
309 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
310 pgmPoolTracDerefGCPhysHint(pPool, pPage,
311 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
312 GstPte.u & X86_PTE_PG_MASK);
313# endif
314 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw], 0);
315 }
316 }
317 break;
318 }
319
320 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
321 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
322 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
323 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
324 {
325 unsigned iGst = off / sizeof(X86PDE);
326 unsigned iShwPdpt = iGst / 256;
327 unsigned iShw = (iGst % 256) * 2;
328 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
329
330 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
331 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
332 {
333 for (unsigned i = 0; i < 2; i++)
334 {
335# ifndef IN_RING0
336 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
337 {
338 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
339 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
340 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
341 break;
342 }
343 else
344# endif /* !IN_RING0 */
345 if (uShw.pPDPae->a[iShw+i].n.u1Present)
346 {
347 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
348 pgmPoolFree(pVM,
349 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
350 pPage->idx,
351 iShw + i);
352 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw+i], 0);
353 }
354
355 /* paranoia / a bit assumptive. */
356 if ( pDis
357 && (off & 3)
358 && (off & 3) + cbWrite > 4)
359 {
360 const unsigned iShw2 = iShw + 2 + i;
361 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
362 {
363# ifndef IN_RING0
364 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
365 {
366 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
367 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
368 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
369 break;
370 }
371 else
372# endif /* !IN_RING0 */
373 if (uShw.pPDPae->a[iShw2].n.u1Present)
374 {
375 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
376 pgmPoolFree(pVM,
377 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
378 pPage->idx,
379 iShw2);
380 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
381 }
382 }
383 }
384 }
385 }
386 break;
387 }
388
389 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
390 {
391 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
392 const unsigned iShw = off / sizeof(X86PTEPAE);
393 if (uShw.pPTPae->a[iShw].n.u1Present)
394 {
395# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
396 X86PTEPAE GstPte;
397 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
398 AssertRC(rc);
399
400 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
401 pgmPoolTracDerefGCPhysHint(pPool, pPage,
402 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
403 GstPte.u & X86_PTE_PAE_PG_MASK);
404# endif
405 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw].u, 0);
406 }
407
408 /* paranoia / a bit assumptive. */
409 if ( pDis
410 && (off & 7)
411 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
412 {
413 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
414 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
415
416 if (uShw.pPTPae->a[iShw2].n.u1Present)
417 {
418# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
419 X86PTEPAE GstPte;
420# ifdef IN_RING3
421 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
422# else
423 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
424# endif
425 AssertRC(rc);
426 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
427 pgmPoolTracDerefGCPhysHint(pPool, pPage,
428 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
429 GstPte.u & X86_PTE_PAE_PG_MASK);
430# endif
431 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw2].u ,0);
432 }
433 }
434 break;
435 }
436
437 case PGMPOOLKIND_32BIT_PD:
438 {
439 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
440 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
441
442 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
443# ifndef IN_RING0
444 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
445 {
446 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
447 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
448 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
449 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
450 break;
451 }
452# endif /* !IN_RING0 */
453# ifndef IN_RING0
454 else
455# endif /* !IN_RING0 */
456 {
457 if (uShw.pPD->a[iShw].n.u1Present)
458 {
459 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
460 pgmPoolFree(pVM,
461 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
462 pPage->idx,
463 iShw);
464 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
465 }
466 }
467 /* paranoia / a bit assumptive. */
468 if ( pDis
469 && (off & 3)
470 && (off & 3) + cbWrite > sizeof(X86PTE))
471 {
472 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
473 if ( iShw2 != iShw
474 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
475 {
476# ifndef IN_RING0
477 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
478 {
479 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
480 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
481 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
482 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
483 break;
484 }
485# endif /* !IN_RING0 */
486# ifndef IN_RING0
487 else
488# endif /* !IN_RING0 */
489 {
490 if (uShw.pPD->a[iShw2].n.u1Present)
491 {
492 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
493 pgmPoolFree(pVM,
494 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
495 pPage->idx,
496 iShw2);
497 ASMAtomicWriteSize(&uShw.pPD->a[iShw2].u, 0);
498 }
499 }
500 }
501 }
502#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
503 if ( uShw.pPD->a[iShw].n.u1Present
504 && !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
505 {
506 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
507# ifdef IN_RC /* TLB load - we're pushing things a bit... */
508 ASMProbeReadByte(pvAddress);
509# endif
510 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
511 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
512 }
513#endif
514 break;
515 }
516
517 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
518 {
519 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
520 const unsigned iShw = off / sizeof(X86PDEPAE);
521#ifndef IN_RING0
522 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
523 {
524 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
525 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
526 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
527 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
528 break;
529 }
530#endif /* !IN_RING0 */
531 /*
532 * Causes trouble when the guest uses a PDE to refer to the whole page table level
533 * structure. (Invalidate here; faults later on when it tries to change the page
534 * table entries -> recheck; probably only applies to the RC case.)
535 */
536# ifndef IN_RING0
537 else
538# endif /* !IN_RING0 */
539 {
540 if (uShw.pPDPae->a[iShw].n.u1Present)
541 {
542 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
543 pgmPoolFree(pVM,
544 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
545 pPage->idx,
546 iShw);
547 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
548 }
549 }
550 /* paranoia / a bit assumptive. */
551 if ( pDis
552 && (off & 7)
553 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
554 {
555 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
556 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
557
558#ifndef IN_RING0
559 if ( iShw2 != iShw
560 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
561 {
562 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
563 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
564 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
565 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
566 break;
567 }
568#endif /* !IN_RING0 */
569# ifndef IN_RING0
570 else
571# endif /* !IN_RING0 */
572 if (uShw.pPDPae->a[iShw2].n.u1Present)
573 {
574 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
575 pgmPoolFree(pVM,
576 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
577 pPage->idx,
578 iShw2);
579 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
580 }
581 }
582 break;
583 }
584
585 case PGMPOOLKIND_PAE_PDPT:
586 {
587 /*
588 * Hopefully this doesn't happen very often:
589 * - touching unused parts of the page
590 * - messing with the bits of pd pointers without changing the physical address
591 */
592 /* PDPT roots are not page aligned; 32 byte only! */
593 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
594
595 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
596 const unsigned iShw = offPdpt / sizeof(X86PDPE);
597 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
598 {
599# ifndef IN_RING0
600 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
601 {
602 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
603 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
604 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
605 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
606 break;
607 }
608# endif /* !IN_RING0 */
609# ifndef IN_RING0
610 else
611# endif /* !IN_RING0 */
612 if (uShw.pPDPT->a[iShw].n.u1Present)
613 {
614 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
615 pgmPoolFree(pVM,
616 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
617 pPage->idx,
618 iShw);
619 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
620 }
621
622 /* paranoia / a bit assumptive. */
623 if ( pDis
624 && (offPdpt & 7)
625 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
626 {
627 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
628 if ( iShw2 != iShw
629 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
630 {
631# ifndef IN_RING0
632 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
633 {
634 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
635 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
636 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
637 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
638 break;
639 }
640# endif /* !IN_RING0 */
641# ifndef IN_RING0
642 else
643# endif /* !IN_RING0 */
644 if (uShw.pPDPT->a[iShw2].n.u1Present)
645 {
646 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
647 pgmPoolFree(pVM,
648 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
649 pPage->idx,
650 iShw2);
651 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
652 }
653 }
654 }
655 }
656 break;
657 }
658
659#ifndef IN_RC
660 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
661 {
662 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
663 const unsigned iShw = off / sizeof(X86PDEPAE);
664 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
665 if (uShw.pPDPae->a[iShw].n.u1Present)
666 {
667 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
668 pgmPoolFree(pVM,
669 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
670 pPage->idx,
671 iShw);
672 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
673 }
674 /* paranoia / a bit assumptive. */
675 if ( pDis
676 && (off & 7)
677 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
678 {
679 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
680 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
681
682 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
683 if (uShw.pPDPae->a[iShw2].n.u1Present)
684 {
685 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
686 pgmPoolFree(pVM,
687 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
688 pPage->idx,
689 iShw2);
690 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
691 }
692 }
693 break;
694 }
695
696 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
697 {
698 /*
699 * Hopefully this doesn't happen very often:
700 * - messing with the bits of pd pointers without changing the physical address
701 */
702 if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
703 {
704 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
705 const unsigned iShw = off / sizeof(X86PDPE);
706 if (uShw.pPDPT->a[iShw].n.u1Present)
707 {
708 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
709 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
710 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
711 }
712 /* paranoia / a bit assumptive. */
713 if ( pDis
714 && (off & 7)
715 && (off & 7) + cbWrite > sizeof(X86PDPE))
716 {
717 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
718 if (uShw.pPDPT->a[iShw2].n.u1Present)
719 {
720 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
721 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
722 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
723 }
724 }
725 }
726 break;
727 }
728
729 case PGMPOOLKIND_64BIT_PML4:
730 {
731 /*
732 * Hopefully this doesn't happen very often:
733 * - messing with the bits of pd pointers without changing the physical address
734 */
735 if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
736 {
737 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
738 const unsigned iShw = off / sizeof(X86PDPE);
739 if (uShw.pPML4->a[iShw].n.u1Present)
740 {
741 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
742 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
743 ASMAtomicWriteSize(&uShw.pPML4->a[iShw].u, 0);
744 }
745 /* paranoia / a bit assumptive. */
746 if ( pDis
747 && (off & 7)
748 && (off & 7) + cbWrite > sizeof(X86PDPE))
749 {
750 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
751 if (uShw.pPML4->a[iShw2].n.u1Present)
752 {
753 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
754 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
755 ASMAtomicWriteSize(&uShw.pPML4->a[iShw2].u, 0);
756 }
757 }
758 }
759 break;
760 }
761#endif /* IN_RING0 */
762
763 default:
764 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
765 }
766 PGMPOOL_UNLOCK_PTR(pVM, uShw.pv);
767
768 /* next */
769 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
770 return;
771 pPage = &pPool->aPages[pPage->iMonitoredNext];
772 }
773}
774
775# ifndef IN_RING3
776/**
777 * Checks if a access could be a fork operation in progress.
778 *
779 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
780 *
781 * @returns true if it's likly that we're forking, otherwise false.
782 * @param pPool The pool.
783 * @param pDis The disassembled instruction.
784 * @param offFault The access offset.
785 */
786DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
787{
788 /*
789 * i386 linux is using btr to clear X86_PTE_RW.
790 * The functions involved are (2.6.16 source inspection):
791 * clear_bit
792 * ptep_set_wrprotect
793 * copy_one_pte
794 * copy_pte_range
795 * copy_pmd_range
796 * copy_pud_range
797 * copy_page_range
798 * dup_mmap
799 * dup_mm
800 * copy_mm
801 * copy_process
802 * do_fork
803 */
804 if ( pDis->pCurInstr->opcode == OP_BTR
805 && !(offFault & 4)
806 /** @todo Validate that the bit index is X86_PTE_RW. */
807 )
808 {
809 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
810 return true;
811 }
812 return false;
813}
814
815
816/**
817 * Determine whether the page is likely to have been reused.
818 *
819 * @returns true if we consider the page as being reused for a different purpose.
820 * @returns false if we consider it to still be a paging page.
821 * @param pVM VM Handle.
822 * @param pVCpu VMCPU Handle.
823 * @param pRegFrame Trap register frame.
824 * @param pDis The disassembly info for the faulting instruction.
825 * @param pvFault The fault address.
826 *
827 * @remark The REP prefix check is left to the caller because of STOSD/W.
828 */
829DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
830{
831#ifndef IN_RC
832 /** @todo could make this general, faulting close to rsp should be safe reuse heuristic. */
833 if ( HWACCMHasPendingIrq(pVM)
834 && (pRegFrame->rsp - pvFault) < 32)
835 {
836 /* Fault caused by stack writes while trying to inject an interrupt event. */
837 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
838 return true;
839 }
840#else
841 NOREF(pVM); NOREF(pvFault);
842#endif
843
844 LogFlow(("Reused instr %RGv %d at %RGv param1.flags=%x param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->opcode, pvFault, pDis->param1.flags, pDis->param1.base.reg_gen));
845
846 /* Non-supervisor mode write means it's used for something else. */
847 if (CPUMGetGuestCPL(pVCpu, pRegFrame) != 0)
848 return true;
849
850 switch (pDis->pCurInstr->opcode)
851 {
852 /* call implies the actual push of the return address faulted */
853 case OP_CALL:
854 Log4(("pgmPoolMonitorIsReused: CALL\n"));
855 return true;
856 case OP_PUSH:
857 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
858 return true;
859 case OP_PUSHF:
860 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
861 return true;
862 case OP_PUSHA:
863 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
864 return true;
865 case OP_FXSAVE:
866 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
867 return true;
868 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
869 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
870 return true;
871 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
872 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
873 return true;
874 case OP_MOVSWD:
875 case OP_STOSWD:
876 if ( pDis->prefix == (PREFIX_REP|PREFIX_REX)
877 && pRegFrame->rcx >= 0x40
878 )
879 {
880 Assert(pDis->mode == CPUMODE_64BIT);
881
882 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
883 return true;
884 }
885 return false;
886 }
887 if ( (pDis->param1.flags & USE_REG_GEN32)
888 && (pDis->param1.base.reg_gen == USE_REG_ESP))
889 {
890 Log4(("pgmPoolMonitorIsReused: ESP\n"));
891 return true;
892 }
893
894 return false;
895}
896
897
898/**
899 * Flushes the page being accessed.
900 *
901 * @returns VBox status code suitable for scheduling.
902 * @param pVM The VM handle.
903 * @param pVCpu The VMCPU handle.
904 * @param pPool The pool.
905 * @param pPage The pool page (head).
906 * @param pDis The disassembly of the write instruction.
907 * @param pRegFrame The trap register frame.
908 * @param GCPhysFault The fault address as guest physical address.
909 * @param pvFault The fault address.
910 */
911static int pgmPoolAccessHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
912 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
913{
914 /*
915 * First, do the flushing.
916 */
917 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
918
919 /*
920 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
921 */
922 uint32_t cbWritten;
923 int rc2 = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, &cbWritten);
924 if (RT_SUCCESS(rc2))
925 pRegFrame->rip += pDis->opsize;
926 else if (rc2 == VERR_EM_INTERPRETER)
927 {
928#ifdef IN_RC
929 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
930 {
931 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
932 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
933 rc = VINF_SUCCESS;
934 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
935 }
936 else
937#endif
938 {
939 rc = VINF_EM_RAW_EMULATE_INSTR;
940 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
941 }
942 }
943 else
944 rc = rc2;
945
946 /* See use in pgmPoolAccessHandlerSimple(). */
947 PGM_INVL_VCPU_TLBS(pVCpu);
948
949 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
950 return rc;
951
952}
953
954
955/**
956 * Handles the STOSD write accesses.
957 *
958 * @returns VBox status code suitable for scheduling.
959 * @param pVM The VM handle.
960 * @param pPool The pool.
961 * @param pPage The pool page (head).
962 * @param pDis The disassembly of the write instruction.
963 * @param pRegFrame The trap register frame.
964 * @param GCPhysFault The fault address as guest physical address.
965 * @param pvFault The fault address.
966 */
967DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
968 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
969{
970 unsigned uIncrement = pDis->param1.size;
971
972 Assert(pDis->mode == CPUMODE_32BIT || pDis->mode == CPUMODE_64BIT);
973 Assert(pRegFrame->rcx <= 0x20);
974
975#ifdef VBOX_STRICT
976 if (pDis->opmode == CPUMODE_32BIT)
977 Assert(uIncrement == 4);
978 else
979 Assert(uIncrement == 8);
980#endif
981
982 Log3(("pgmPoolAccessHandlerSTOSD\n"));
983
984 /*
985 * Increment the modification counter and insert it into the list
986 * of modified pages the first time.
987 */
988 if (!pPage->cModifications++)
989 pgmPoolMonitorModifiedInsert(pPool, pPage);
990
991 /*
992 * Execute REP STOSD.
993 *
994 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
995 * write situation, meaning that it's safe to write here.
996 */
997 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
998 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
999 while (pRegFrame->rcx)
1000 {
1001#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1002 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1003 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1004 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1005#else
1006 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1007#endif
1008#ifdef IN_RC
1009 *(uint32_t *)pu32 = pRegFrame->eax;
1010#else
1011 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
1012#endif
1013 pu32 += uIncrement;
1014 GCPhysFault += uIncrement;
1015 pRegFrame->rdi += uIncrement;
1016 pRegFrame->rcx--;
1017 }
1018 pRegFrame->rip += pDis->opsize;
1019
1020#ifdef IN_RC
1021 /* See use in pgmPoolAccessHandlerSimple(). */
1022 PGM_INVL_VCPU_TLBS(pVCpu);
1023#endif
1024
1025 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
1026 return VINF_SUCCESS;
1027}
1028
1029
1030/**
1031 * Handles the simple write accesses.
1032 *
1033 * @returns VBox status code suitable for scheduling.
1034 * @param pVM The VM handle.
1035 * @param pVCpu The VMCPU handle.
1036 * @param pPool The pool.
1037 * @param pPage The pool page (head).
1038 * @param pDis The disassembly of the write instruction.
1039 * @param pRegFrame The trap register frame.
1040 * @param GCPhysFault The fault address as guest physical address.
1041 * @param pvFault The fault address.
1042 */
1043DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
1044 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
1045{
1046 Log3(("pgmPoolAccessHandlerSimple\n"));
1047 /*
1048 * Increment the modification counter and insert it into the list
1049 * of modified pages the first time.
1050 */
1051 if (!pPage->cModifications++)
1052 pgmPoolMonitorModifiedInsert(pPool, pPage);
1053
1054 /*
1055 * Clear all the pages. ASSUMES that pvFault is readable.
1056 */
1057#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1058 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1059 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, pDis);
1060 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1061#else
1062 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, pDis);
1063#endif
1064
1065 /*
1066 * Interpret the instruction.
1067 */
1068 uint32_t cb;
1069 int rc = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, &cb);
1070 if (RT_SUCCESS(rc))
1071 pRegFrame->rip += pDis->opsize;
1072 else if (rc == VERR_EM_INTERPRETER)
1073 {
1074 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1075 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode));
1076 rc = VINF_EM_RAW_EMULATE_INSTR;
1077 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1078 }
1079
1080#ifdef IN_RC
1081 /*
1082 * Quick hack, with logging enabled we're getting stale
1083 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
1084 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
1085 * have to be fixed to support this. But that'll have to wait till next week.
1086 *
1087 * An alternative is to keep track of the changed PTEs together with the
1088 * GCPhys from the guest PT. This may proove expensive though.
1089 *
1090 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
1091 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
1092 */
1093 PGM_INVL_VCPU_TLBS(pVCpu);
1094#endif
1095
1096 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
1097 return rc;
1098}
1099
1100/**
1101 * \#PF Handler callback for PT write accesses.
1102 *
1103 * @returns VBox status code (appropriate for GC return).
1104 * @param pVM VM Handle.
1105 * @param uErrorCode CPU Error code.
1106 * @param pRegFrame Trap register frame.
1107 * NULL on DMA and other non CPU access.
1108 * @param pvFault The fault address (cr2).
1109 * @param GCPhysFault The GC physical address corresponding to pvFault.
1110 * @param pvUser User argument.
1111 */
1112DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1113{
1114 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1115 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1116 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1117 PVMCPU pVCpu = VMMGetCpu(pVM);
1118 unsigned cMaxModifications;
1119
1120 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1121
1122 pgmLock(pVM);
1123 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1124 {
1125 /* Pool page changed while we were waiting for the lock; ignore. */
1126 Log(("CPU%d: pgmPoolAccessHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1127 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1128 pgmUnlock(pVM);
1129 return VINF_SUCCESS;
1130 }
1131
1132 /*
1133 * Disassemble the faulting instruction.
1134 */
1135 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1136 int rc = EMInterpretDisasOne(pVM, pVCpu, pRegFrame, pDis, NULL);
1137 AssertReturnStmt(rc == VINF_SUCCESS, pgmUnlock(pVM), rc);
1138
1139 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1140
1141 /*
1142 * We should ALWAYS have the list head as user parameter. This
1143 * is because we use that page to record the changes.
1144 */
1145 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1146
1147 /* Maximum nr of modifications depends on the guest mode. */
1148 if (pDis->mode == CPUMODE_32BIT)
1149 cMaxModifications = 32;
1150 else
1151 cMaxModifications = 16;
1152
1153 /*
1154 * Incremental page table updates should weight more than random ones.
1155 * (Only applies when started from offset 0)
1156 */
1157 pVCpu->pgm.s.cPoolAccessHandler++;
1158 if ( pPage->pvLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1159 && pPage->pvLastAccessHandlerRip < pRegFrame->rip + 0x40
1160 && pvFault == (pPage->pvLastAccessHandlerFault + pDis->param1.size)
1161 && pVCpu->pgm.s.cPoolAccessHandler == (pPage->cLastAccessHandlerCount + 1))
1162 {
1163 Log(("Possible page reuse cMods=%d -> %d\n", pPage->cModifications, (pPage->cModifications + 2) * 2));
1164 pPage->cModifications = (pPage->cModifications + 1) * 2;
1165 pPage->pvLastAccessHandlerFault = pvFault;
1166 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1167 if (pPage->cModifications > cMaxModifications)
1168 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1169 }
1170
1171 /*
1172 * Check if it's worth dealing with.
1173 */
1174 bool fReused = false;
1175 if ( ( pPage->cModifications < cMaxModifications /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1176 || pgmPoolIsPageLocked(&pVM->pgm.s, pPage)
1177 )
1178 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1179 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1180 {
1181 /*
1182 * Simple instructions, no REP prefix.
1183 */
1184 if (!(pDis->prefix & (PREFIX_REP | PREFIX_REPNE)))
1185 {
1186 rc = pgmPoolAccessHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1187
1188 /* A mov instruction to change the first page table entry will be remembered so we can detect
1189 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1190 */
1191 if ( rc == VINF_SUCCESS
1192 && pDis->pCurInstr->opcode == OP_MOV
1193 && (pvFault & PAGE_OFFSET_MASK) == 0)
1194 {
1195 pPage->pvLastAccessHandlerFault = pvFault;
1196 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1197 pPage->pvLastAccessHandlerRip = pRegFrame->rip;
1198 }
1199 else
1200 if (pPage->pvLastAccessHandlerFault == pvFault)
1201 {
1202 /* ignore the 2nd write to this page table entry. */
1203 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1204 }
1205 else
1206 {
1207 pPage->pvLastAccessHandlerFault = 0;
1208 pPage->pvLastAccessHandlerRip = 0;
1209 }
1210
1211 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1212 pgmUnlock(pVM);
1213 return rc;
1214 }
1215
1216 /*
1217 * Windows is frequently doing small memset() operations (netio test 4k+).
1218 * We have to deal with these or we'll kill the cache and performance.
1219 */
1220 if ( pDis->pCurInstr->opcode == OP_STOSWD
1221 && !pRegFrame->eflags.Bits.u1DF
1222 && pDis->opmode == pDis->mode
1223 && pDis->addrmode == pDis->mode)
1224 {
1225 bool fValidStosd = false;
1226
1227 if ( pDis->mode == CPUMODE_32BIT
1228 && pDis->prefix == PREFIX_REP
1229 && pRegFrame->ecx <= 0x20
1230 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1231 && !((uintptr_t)pvFault & 3)
1232 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1233 )
1234 {
1235 fValidStosd = true;
1236 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1237 }
1238 else
1239 if ( pDis->mode == CPUMODE_64BIT
1240 && pDis->prefix == (PREFIX_REP | PREFIX_REX)
1241 && pRegFrame->rcx <= 0x20
1242 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1243 && !((uintptr_t)pvFault & 7)
1244 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1245 )
1246 {
1247 fValidStosd = true;
1248 }
1249
1250 if (fValidStosd)
1251 {
1252 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1253 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1254 pgmUnlock(pVM);
1255 return rc;
1256 }
1257 }
1258
1259 /* REP prefix, don't bother. */
1260 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1261 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1262 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode, pDis->prefix));
1263 }
1264
1265 /*
1266 * Not worth it, so flush it.
1267 *
1268 * If we considered it to be reused, don't go back to ring-3
1269 * to emulate failed instructions since we usually cannot
1270 * interpret then. This may be a bit risky, in which case
1271 * the reuse detection must be fixed.
1272 */
1273 rc = pgmPoolAccessHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1274 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
1275 rc = VINF_SUCCESS;
1276 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1277 pgmUnlock(pVM);
1278 return rc;
1279}
1280
1281# endif /* !IN_RING3 */
1282#endif /* PGMPOOL_WITH_MONITORING */
1283
1284#ifdef PGMPOOL_WITH_CACHE
1285
1286/**
1287 * Inserts a page into the GCPhys hash table.
1288 *
1289 * @param pPool The pool.
1290 * @param pPage The page.
1291 */
1292DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1293{
1294 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1295 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1296 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1297 pPage->iNext = pPool->aiHash[iHash];
1298 pPool->aiHash[iHash] = pPage->idx;
1299}
1300
1301
1302/**
1303 * Removes a page from the GCPhys hash table.
1304 *
1305 * @param pPool The pool.
1306 * @param pPage The page.
1307 */
1308DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1309{
1310 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1311 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1312 if (pPool->aiHash[iHash] == pPage->idx)
1313 pPool->aiHash[iHash] = pPage->iNext;
1314 else
1315 {
1316 uint16_t iPrev = pPool->aiHash[iHash];
1317 for (;;)
1318 {
1319 const int16_t i = pPool->aPages[iPrev].iNext;
1320 if (i == pPage->idx)
1321 {
1322 pPool->aPages[iPrev].iNext = pPage->iNext;
1323 break;
1324 }
1325 if (i == NIL_PGMPOOL_IDX)
1326 {
1327 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1328 break;
1329 }
1330 iPrev = i;
1331 }
1332 }
1333 pPage->iNext = NIL_PGMPOOL_IDX;
1334}
1335
1336
1337/**
1338 * Frees up one cache page.
1339 *
1340 * @returns VBox status code.
1341 * @retval VINF_SUCCESS on success.
1342 * @param pPool The pool.
1343 * @param iUser The user index.
1344 */
1345static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1346{
1347#ifndef IN_RC
1348 const PVM pVM = pPool->CTX_SUFF(pVM);
1349#endif
1350 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1351 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1352
1353 /*
1354 * Select one page from the tail of the age list.
1355 */
1356 PPGMPOOLPAGE pPage;
1357 for (unsigned iLoop = 0; ; iLoop++)
1358 {
1359 uint16_t iToFree = pPool->iAgeTail;
1360 if (iToFree == iUser)
1361 iToFree = pPool->aPages[iToFree].iAgePrev;
1362/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1363 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1364 {
1365 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1366 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1367 {
1368 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1369 continue;
1370 iToFree = i;
1371 break;
1372 }
1373 }
1374*/
1375 Assert(iToFree != iUser);
1376 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1377 pPage = &pPool->aPages[iToFree];
1378
1379 /*
1380 * Reject any attempts at flushing the currently active shadow CR3 mapping.
1381 * Call pgmPoolCacheUsed to move the page to the head of the age list.
1382 */
1383 if (!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage))
1384 break;
1385 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
1386 pgmPoolCacheUsed(pPool, pPage);
1387 AssertLogRelReturn(iLoop < 8192, VERR_INTERNAL_ERROR);
1388 }
1389
1390 /*
1391 * Found a usable page, flush it and return.
1392 */
1393 int rc = pgmPoolFlushPage(pPool, pPage);
1394 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
1395 if (rc == VINF_SUCCESS)
1396 PGM_INVL_ALL_VCPU_TLBS(pVM);
1397 return rc;
1398}
1399
1400
1401/**
1402 * Checks if a kind mismatch is really a page being reused
1403 * or if it's just normal remappings.
1404 *
1405 * @returns true if reused and the cached page (enmKind1) should be flushed
1406 * @returns false if not reused.
1407 * @param enmKind1 The kind of the cached page.
1408 * @param enmKind2 The kind of the requested page.
1409 */
1410static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1411{
1412 switch (enmKind1)
1413 {
1414 /*
1415 * Never reuse them. There is no remapping in non-paging mode.
1416 */
1417 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1418 case PGMPOOLKIND_32BIT_PD_PHYS:
1419 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1420 case PGMPOOLKIND_PAE_PD_PHYS:
1421 case PGMPOOLKIND_PAE_PDPT_PHYS:
1422 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1423 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1424 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1425 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1426 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1427 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
1428 return false;
1429
1430 /*
1431 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1432 */
1433 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1434 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1435 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1436 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1437 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1438 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1439 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1440 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1441 case PGMPOOLKIND_32BIT_PD:
1442 case PGMPOOLKIND_PAE_PDPT:
1443 switch (enmKind2)
1444 {
1445 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1446 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1447 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1448 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1449 case PGMPOOLKIND_64BIT_PML4:
1450 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1451 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1452 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1453 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1454 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1455 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1456 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1457 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1458 return true;
1459 default:
1460 return false;
1461 }
1462
1463 /*
1464 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1465 */
1466 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1467 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1468 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1469 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1470 case PGMPOOLKIND_64BIT_PML4:
1471 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1472 switch (enmKind2)
1473 {
1474 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1475 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1476 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1477 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1478 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1479 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1480 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1481 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1482 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1483 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1484 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1485 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1486 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1487 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1488 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1489 return true;
1490 default:
1491 return false;
1492 }
1493
1494 /*
1495 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1496 */
1497 case PGMPOOLKIND_ROOT_NESTED:
1498 return false;
1499
1500 default:
1501 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1502 }
1503}
1504
1505
1506/**
1507 * Attempts to satisfy a pgmPoolAlloc request from the cache.
1508 *
1509 * @returns VBox status code.
1510 * @retval VINF_PGM_CACHED_PAGE on success.
1511 * @retval VERR_FILE_NOT_FOUND if not found.
1512 * @param pPool The pool.
1513 * @param GCPhys The GC physical address of the page we're gonna shadow.
1514 * @param enmKind The kind of mapping.
1515 * @param enmAccess Access type for the mapping (only relevant for big pages)
1516 * @param iUser The shadow page pool index of the user table.
1517 * @param iUserTable The index into the user table (shadowed).
1518 * @param ppPage Where to store the pointer to the page.
1519 */
1520static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
1521{
1522#ifndef IN_RC
1523 const PVM pVM = pPool->CTX_SUFF(pVM);
1524#endif
1525 /*
1526 * Look up the GCPhys in the hash.
1527 */
1528 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1529 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%x iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
1530 if (i != NIL_PGMPOOL_IDX)
1531 {
1532 do
1533 {
1534 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1535 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
1536 if (pPage->GCPhys == GCPhys)
1537 {
1538 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
1539 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess)
1540 {
1541 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
1542 * doesn't flush it in case there are no more free use records.
1543 */
1544 pgmPoolCacheUsed(pPool, pPage);
1545
1546 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
1547 if (RT_SUCCESS(rc))
1548 {
1549 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
1550 *ppPage = pPage;
1551 if (pPage->cModifications)
1552 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
1553 STAM_COUNTER_INC(&pPool->StatCacheHits);
1554 return VINF_PGM_CACHED_PAGE;
1555 }
1556 return rc;
1557 }
1558
1559 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
1560 {
1561 /*
1562 * The kind is different. In some cases we should now flush the page
1563 * as it has been reused, but in most cases this is normal remapping
1564 * of PDs as PT or big pages using the GCPhys field in a slightly
1565 * different way than the other kinds.
1566 */
1567 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
1568 {
1569 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
1570 pgmPoolFlushPage(pPool, pPage);
1571 PGM_INVL_VCPU_TLBS(VMMGetCpu(pVM)); /* see PT handler. */
1572 break;
1573 }
1574 }
1575 }
1576
1577 /* next */
1578 i = pPage->iNext;
1579 } while (i != NIL_PGMPOOL_IDX);
1580 }
1581
1582 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
1583 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1584 return VERR_FILE_NOT_FOUND;
1585}
1586
1587
1588/**
1589 * Inserts a page into the cache.
1590 *
1591 * @param pPool The pool.
1592 * @param pPage The cached page.
1593 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1594 */
1595static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1596{
1597 /*
1598 * Insert into the GCPhys hash if the page is fit for that.
1599 */
1600 Assert(!pPage->fCached);
1601 if (fCanBeCached)
1602 {
1603 pPage->fCached = true;
1604 pgmPoolHashInsert(pPool, pPage);
1605 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
1606 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
1607 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1608 }
1609 else
1610 {
1611 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
1612 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
1613 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1614 }
1615
1616 /*
1617 * Insert at the head of the age list.
1618 */
1619 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1620 pPage->iAgeNext = pPool->iAgeHead;
1621 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1622 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1623 else
1624 pPool->iAgeTail = pPage->idx;
1625 pPool->iAgeHead = pPage->idx;
1626}
1627
1628
1629/**
1630 * Flushes a cached page.
1631 *
1632 * @param pPool The pool.
1633 * @param pPage The cached page.
1634 */
1635static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1636{
1637 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
1638
1639 /*
1640 * Remove the page from the hash.
1641 */
1642 if (pPage->fCached)
1643 {
1644 pPage->fCached = false;
1645 pgmPoolHashRemove(pPool, pPage);
1646 }
1647 else
1648 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1649
1650 /*
1651 * Remove it from the age list.
1652 */
1653 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1654 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1655 else
1656 pPool->iAgeTail = pPage->iAgePrev;
1657 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1658 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1659 else
1660 pPool->iAgeHead = pPage->iAgeNext;
1661 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1662 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1663}
1664
1665#endif /* PGMPOOL_WITH_CACHE */
1666#ifdef PGMPOOL_WITH_MONITORING
1667
1668/**
1669 * Looks for pages sharing the monitor.
1670 *
1671 * @returns Pointer to the head page.
1672 * @returns NULL if not found.
1673 * @param pPool The Pool
1674 * @param pNewPage The page which is going to be monitored.
1675 */
1676static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1677{
1678#ifdef PGMPOOL_WITH_CACHE
1679 /*
1680 * Look up the GCPhys in the hash.
1681 */
1682 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1683 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1684 if (i == NIL_PGMPOOL_IDX)
1685 return NULL;
1686 do
1687 {
1688 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1689 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1690 && pPage != pNewPage)
1691 {
1692 switch (pPage->enmKind)
1693 {
1694 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1695 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1696 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1697 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1698 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1699 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1700 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1701 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1702 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1703 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1704 case PGMPOOLKIND_64BIT_PML4:
1705 case PGMPOOLKIND_32BIT_PD:
1706 case PGMPOOLKIND_PAE_PDPT:
1707 {
1708 /* find the head */
1709 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1710 {
1711 Assert(pPage->iMonitoredPrev != pPage->idx);
1712 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1713 }
1714 return pPage;
1715 }
1716
1717 /* ignore, no monitoring. */
1718 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1719 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1720 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1721 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1722 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1723 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1724 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1725 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1726 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1727 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1728 case PGMPOOLKIND_ROOT_NESTED:
1729 case PGMPOOLKIND_PAE_PD_PHYS:
1730 case PGMPOOLKIND_PAE_PDPT_PHYS:
1731 case PGMPOOLKIND_32BIT_PD_PHYS:
1732 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
1733 break;
1734 default:
1735 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1736 }
1737 }
1738
1739 /* next */
1740 i = pPage->iNext;
1741 } while (i != NIL_PGMPOOL_IDX);
1742#endif
1743 return NULL;
1744}
1745
1746
1747/**
1748 * Enabled write monitoring of a guest page.
1749 *
1750 * @returns VBox status code.
1751 * @retval VINF_SUCCESS on success.
1752 * @param pPool The pool.
1753 * @param pPage The cached page.
1754 */
1755static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1756{
1757 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
1758
1759 /*
1760 * Filter out the relevant kinds.
1761 */
1762 switch (pPage->enmKind)
1763 {
1764 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1765 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1766 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1767 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1768 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1769 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1770 case PGMPOOLKIND_64BIT_PML4:
1771 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1772 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1773 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1774 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1775 case PGMPOOLKIND_32BIT_PD:
1776 case PGMPOOLKIND_PAE_PDPT:
1777 break;
1778
1779 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1780 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1781 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1782 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1783 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1784 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1785 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1786 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1787 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1788 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1789 case PGMPOOLKIND_ROOT_NESTED:
1790 /* Nothing to monitor here. */
1791 return VINF_SUCCESS;
1792
1793 case PGMPOOLKIND_32BIT_PD_PHYS:
1794 case PGMPOOLKIND_PAE_PDPT_PHYS:
1795 case PGMPOOLKIND_PAE_PD_PHYS:
1796 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
1797 /* Nothing to monitor here. */
1798 return VINF_SUCCESS;
1799#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1800 break;
1801#else
1802 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1803#endif
1804 default:
1805 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1806 }
1807
1808 /*
1809 * Install handler.
1810 */
1811 int rc;
1812 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1813 if (pPageHead)
1814 {
1815 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1816 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1817 pPage->iMonitoredPrev = pPageHead->idx;
1818 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1819 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1820 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1821 pPageHead->iMonitoredNext = pPage->idx;
1822 rc = VINF_SUCCESS;
1823 }
1824 else
1825 {
1826 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1827 PVM pVM = pPool->CTX_SUFF(pVM);
1828 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1829 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1830 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1831 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1832 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1833 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
1834 pPool->pszAccessHandler);
1835 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1836 * the heap size should suffice. */
1837 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
1838 Assert(!(VMMGetCpu(pVM)->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(VMMGetCpu(pVM), VMCPU_FF_PGM_SYNC_CR3));
1839 }
1840 pPage->fMonitored = true;
1841 return rc;
1842}
1843
1844
1845/**
1846 * Disables write monitoring of a guest page.
1847 *
1848 * @returns VBox status code.
1849 * @retval VINF_SUCCESS on success.
1850 * @param pPool The pool.
1851 * @param pPage The cached page.
1852 */
1853static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1854{
1855 /*
1856 * Filter out the relevant kinds.
1857 */
1858 switch (pPage->enmKind)
1859 {
1860 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1861 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1862 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1863 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1864 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1865 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1866 case PGMPOOLKIND_64BIT_PML4:
1867 case PGMPOOLKIND_32BIT_PD:
1868 case PGMPOOLKIND_PAE_PDPT:
1869 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1870 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1871 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1872 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1873 break;
1874
1875 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1876 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1877 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1878 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1879 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1880 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1881 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1882 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1883 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1884 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1885 case PGMPOOLKIND_ROOT_NESTED:
1886 case PGMPOOLKIND_PAE_PD_PHYS:
1887 case PGMPOOLKIND_PAE_PDPT_PHYS:
1888 case PGMPOOLKIND_32BIT_PD_PHYS:
1889 /* Nothing to monitor here. */
1890 return VINF_SUCCESS;
1891
1892#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1893 break;
1894#endif
1895 default:
1896 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1897 }
1898
1899 /*
1900 * Remove the page from the monitored list or uninstall it if last.
1901 */
1902 const PVM pVM = pPool->CTX_SUFF(pVM);
1903 int rc;
1904 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1905 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1906 {
1907 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1908 {
1909 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1910 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1911 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1912 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
1913 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
1914 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
1915 pPool->pszAccessHandler);
1916 AssertFatalRCSuccess(rc);
1917 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1918 }
1919 else
1920 {
1921 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1922 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1923 {
1924 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1925 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1926 }
1927 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1928 rc = VINF_SUCCESS;
1929 }
1930 }
1931 else
1932 {
1933 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
1934 AssertFatalRC(rc);
1935#ifdef VBOX_STRICT
1936 PVMCPU pVCpu = VMMGetCpu(pVM);
1937#endif
1938 AssertMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
1939 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
1940 }
1941 pPage->fMonitored = false;
1942
1943 /*
1944 * Remove it from the list of modified pages (if in it).
1945 */
1946 pgmPoolMonitorModifiedRemove(pPool, pPage);
1947
1948 return rc;
1949}
1950
1951
1952/**
1953 * Inserts the page into the list of modified pages.
1954 *
1955 * @param pPool The pool.
1956 * @param pPage The page.
1957 */
1958void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1959{
1960 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
1961 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
1962 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
1963 && pPool->iModifiedHead != pPage->idx,
1964 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
1965 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
1966 pPool->iModifiedHead, pPool->cModifiedPages));
1967
1968 pPage->iModifiedNext = pPool->iModifiedHead;
1969 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
1970 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
1971 pPool->iModifiedHead = pPage->idx;
1972 pPool->cModifiedPages++;
1973#ifdef VBOX_WITH_STATISTICS
1974 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
1975 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
1976#endif
1977}
1978
1979
1980/**
1981 * Removes the page from the list of modified pages and resets the
1982 * moficiation counter.
1983 *
1984 * @param pPool The pool.
1985 * @param pPage The page which is believed to be in the list of modified pages.
1986 */
1987static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1988{
1989 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
1990 if (pPool->iModifiedHead == pPage->idx)
1991 {
1992 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1993 pPool->iModifiedHead = pPage->iModifiedNext;
1994 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1995 {
1996 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
1997 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1998 }
1999 pPool->cModifiedPages--;
2000 }
2001 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2002 {
2003 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2004 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2005 {
2006 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2007 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2008 }
2009 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2010 pPool->cModifiedPages--;
2011 }
2012 else
2013 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2014 pPage->cModifications = 0;
2015}
2016
2017
2018/**
2019 * Zaps the list of modified pages, resetting their modification counters in the process.
2020 *
2021 * @param pVM The VM handle.
2022 */
2023void pgmPoolMonitorModifiedClearAll(PVM pVM)
2024{
2025 pgmLock(pVM);
2026 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2027 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2028
2029 unsigned cPages = 0; NOREF(cPages);
2030 uint16_t idx = pPool->iModifiedHead;
2031 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2032 while (idx != NIL_PGMPOOL_IDX)
2033 {
2034 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2035 idx = pPage->iModifiedNext;
2036 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2037 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2038 pPage->cModifications = 0;
2039 Assert(++cPages);
2040 }
2041 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2042 pPool->cModifiedPages = 0;
2043 pgmUnlock(pVM);
2044}
2045
2046
2047#ifdef IN_RING3
2048/**
2049 * Callback to clear all shadow pages and clear all modification counters.
2050 *
2051 * @returns VBox status code.
2052 * @param pVM The VM handle.
2053 * @param pVCpu The VMCPU for the EMT we're being called on. Unused.
2054 * @param pvUser Unused parameter.
2055 *
2056 * @remark Should only be used when monitoring is available, thus placed in
2057 * the PGMPOOL_WITH_MONITORING \#ifdef.
2058 */
2059DECLCALLBACK(int) pgmPoolClearAll(PVM pVM, PVMCPU pVCpu, void *pvUser)
2060{
2061 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2062 STAM_PROFILE_START(&pPool->StatClearAll, c);
2063 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
2064 NOREF(pvUser); NOREF(pVCpu);
2065
2066 pgmLock(pVM);
2067
2068 /*
2069 * Iterate all the pages until we've encountered all that in use.
2070 * This is simple but not quite optimal solution.
2071 */
2072 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
2073 unsigned cLeft = pPool->cUsedPages;
2074 unsigned iPage = pPool->cCurPages;
2075 while (--iPage >= PGMPOOL_IDX_FIRST)
2076 {
2077 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2078 if (pPage->GCPhys != NIL_RTGCPHYS)
2079 {
2080 switch (pPage->enmKind)
2081 {
2082 /*
2083 * We only care about shadow page tables.
2084 */
2085 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2086 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2087 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2088 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2089 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2090 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2091 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2092 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2093 {
2094#ifdef PGMPOOL_WITH_USER_TRACKING
2095 if (pPage->cPresent)
2096#endif
2097 {
2098 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
2099 STAM_PROFILE_START(&pPool->StatZeroPage, z);
2100 ASMMemZeroPage(pvShw);
2101 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
2102#ifdef PGMPOOL_WITH_USER_TRACKING
2103 pPage->cPresent = 0;
2104 pPage->iFirstPresent = ~0;
2105#endif
2106 }
2107 }
2108 /* fall thru */
2109
2110 default:
2111 Assert(!pPage->cModifications || ++cModifiedPages);
2112 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2113 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2114 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2115 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2116 pPage->cModifications = 0;
2117 break;
2118
2119 }
2120 if (!--cLeft)
2121 break;
2122 }
2123 }
2124
2125 /* swipe the special pages too. */
2126 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
2127 {
2128 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2129 if (pPage->GCPhys != NIL_RTGCPHYS)
2130 {
2131 Assert(!pPage->cModifications || ++cModifiedPages);
2132 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2133 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2134 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2135 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2136 pPage->cModifications = 0;
2137 }
2138 }
2139
2140#ifndef DEBUG_michael
2141 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
2142#endif
2143 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2144 pPool->cModifiedPages = 0;
2145
2146#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2147 /*
2148 * Clear all the GCPhys links and rebuild the phys ext free list.
2149 */
2150 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
2151 pRam;
2152 pRam = pRam->CTX_SUFF(pNext))
2153 {
2154 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2155 while (iPage-- > 0)
2156 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
2157 }
2158
2159 pPool->iPhysExtFreeHead = 0;
2160 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2161 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
2162 for (unsigned i = 0; i < cMaxPhysExts; i++)
2163 {
2164 paPhysExts[i].iNext = i + 1;
2165 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
2166 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
2167 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
2168 }
2169 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2170#endif
2171
2172 /* Clear the PGM_SYNC_CLEAR_PGM_POOL flag on all VCPUs to prevent redundant flushes. */
2173 for (unsigned idCpu = 0; idCpu < pVM->cCPUs; idCpu++)
2174 {
2175 PVMCPU pVCpu = &pVM->aCpus[idCpu];
2176
2177 pVCpu->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
2178 }
2179
2180 pPool->cPresent = 0;
2181 pgmUnlock(pVM);
2182 PGM_INVL_ALL_VCPU_TLBS(pVM);
2183 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
2184 return VINF_SUCCESS;
2185}
2186#endif /* IN_RING3 */
2187
2188
2189/**
2190 * Handle SyncCR3 pool tasks
2191 *
2192 * @returns VBox status code.
2193 * @retval VINF_SUCCESS if successfully added.
2194 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2195 * @param pVCpu The VMCPU handle.
2196 * @remark Should only be used when monitoring is available, thus placed in
2197 * the PGMPOOL_WITH_MONITORING #ifdef.
2198 */
2199int pgmPoolSyncCR3(PVMCPU pVCpu)
2200{
2201 PVM pVM = pVCpu->CTX_SUFF(pVM);
2202 LogFlow(("pgmPoolSyncCR3\n"));
2203
2204 /*
2205 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2206 * Occasionally we will have to clear all the shadow page tables because we wanted
2207 * to monitor a page which was mapped by too many shadowed page tables. This operation
2208 * sometimes refered to as a 'lightweight flush'.
2209 */
2210# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2211 if (ASMBitTestAndClear(&pVCpu->pgm.s.fSyncFlags, PGM_SYNC_CLEAR_PGM_POOL_BIT))
2212 {
2213 int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, pgmPoolClearAll, NULL);
2214 AssertRC(rc);
2215 }
2216# else /* !IN_RING3 */
2217 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2218 {
2219 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2220 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2221 return VINF_PGM_SYNC_CR3;
2222 }
2223# endif /* !IN_RING3 */
2224 else
2225 pgmPoolMonitorModifiedClearAll(pVM);
2226
2227 return VINF_SUCCESS;
2228}
2229
2230#endif /* PGMPOOL_WITH_MONITORING */
2231#ifdef PGMPOOL_WITH_USER_TRACKING
2232
2233/**
2234 * Frees up at least one user entry.
2235 *
2236 * @returns VBox status code.
2237 * @retval VINF_SUCCESS if successfully added.
2238 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2239 * @param pPool The pool.
2240 * @param iUser The user index.
2241 */
2242static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2243{
2244 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2245#ifdef PGMPOOL_WITH_CACHE
2246 /*
2247 * Just free cached pages in a braindead fashion.
2248 */
2249 /** @todo walk the age list backwards and free the first with usage. */
2250 int rc = VINF_SUCCESS;
2251 do
2252 {
2253 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2254 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2255 rc = rc2;
2256 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2257 return rc;
2258#else
2259 /*
2260 * Lazy approach.
2261 */
2262 /* @todo This path no longer works (CR3 root pages will be flushed)!! */
2263 AssertCompileFailed();
2264 Assert(!CPUMIsGuestInLongMode(pVM));
2265 pgmPoolFlushAllInt(pPool);
2266 return VERR_PGM_POOL_FLUSHED;
2267#endif
2268}
2269
2270
2271/**
2272 * Inserts a page into the cache.
2273 *
2274 * This will create user node for the page, insert it into the GCPhys
2275 * hash, and insert it into the age list.
2276 *
2277 * @returns VBox status code.
2278 * @retval VINF_SUCCESS if successfully added.
2279 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2280 * @param pPool The pool.
2281 * @param pPage The cached page.
2282 * @param GCPhys The GC physical address of the page we're gonna shadow.
2283 * @param iUser The user index.
2284 * @param iUserTable The user table index.
2285 */
2286DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2287{
2288 int rc = VINF_SUCCESS;
2289 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2290
2291 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser %x iUserTable %x\n", GCPhys, iUser, iUserTable));
2292
2293#ifdef VBOX_STRICT
2294 /*
2295 * Check that the entry doesn't already exists.
2296 */
2297 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2298 {
2299 uint16_t i = pPage->iUserHead;
2300 do
2301 {
2302 Assert(i < pPool->cMaxUsers);
2303 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2304 i = paUsers[i].iNext;
2305 } while (i != NIL_PGMPOOL_USER_INDEX);
2306 }
2307#endif
2308
2309 /*
2310 * Find free a user node.
2311 */
2312 uint16_t i = pPool->iUserFreeHead;
2313 if (i == NIL_PGMPOOL_USER_INDEX)
2314 {
2315 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2316 if (RT_FAILURE(rc))
2317 return rc;
2318 i = pPool->iUserFreeHead;
2319 }
2320
2321 /*
2322 * Unlink the user node from the free list,
2323 * initialize and insert it into the user list.
2324 */
2325 pPool->iUserFreeHead = paUsers[i].iNext;
2326 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2327 paUsers[i].iUser = iUser;
2328 paUsers[i].iUserTable = iUserTable;
2329 pPage->iUserHead = i;
2330
2331 /*
2332 * Insert into cache and enable monitoring of the guest page if enabled.
2333 *
2334 * Until we implement caching of all levels, including the CR3 one, we'll
2335 * have to make sure we don't try monitor & cache any recursive reuse of
2336 * a monitored CR3 page. Because all windows versions are doing this we'll
2337 * have to be able to do combined access monitoring, CR3 + PT and
2338 * PD + PT (guest PAE).
2339 *
2340 * Update:
2341 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2342 */
2343#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
2344# ifdef PGMPOOL_WITH_MIXED_PT_CR3
2345 const bool fCanBeMonitored = true;
2346# else
2347 bool fCanBeMonitored = pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
2348 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
2349 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
2350# endif
2351# ifdef PGMPOOL_WITH_CACHE
2352 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2353# endif
2354 if (fCanBeMonitored)
2355 {
2356# ifdef PGMPOOL_WITH_MONITORING
2357 rc = pgmPoolMonitorInsert(pPool, pPage);
2358 AssertRC(rc);
2359 }
2360# endif
2361#endif /* PGMPOOL_WITH_MONITORING */
2362 return rc;
2363}
2364
2365
2366# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
2367/**
2368 * Adds a user reference to a page.
2369 *
2370 * This will move the page to the head of the
2371 *
2372 * @returns VBox status code.
2373 * @retval VINF_SUCCESS if successfully added.
2374 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2375 * @param pPool The pool.
2376 * @param pPage The cached page.
2377 * @param iUser The user index.
2378 * @param iUserTable The user table.
2379 */
2380static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2381{
2382 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2383
2384 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2385
2386# ifdef VBOX_STRICT
2387 /*
2388 * Check that the entry doesn't already exists. We only allow multiple users of top-level paging structures (SHW_POOL_ROOT_IDX).
2389 */
2390 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2391 {
2392 uint16_t i = pPage->iUserHead;
2393 do
2394 {
2395 Assert(i < pPool->cMaxUsers);
2396 AssertMsg(iUser != PGMPOOL_IDX_PD || iUser != PGMPOOL_IDX_PDPT || iUser != PGMPOOL_IDX_NESTED_ROOT || iUser != PGMPOOL_IDX_AMD64_CR3 ||
2397 paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2398 i = paUsers[i].iNext;
2399 } while (i != NIL_PGMPOOL_USER_INDEX);
2400 }
2401# endif
2402
2403 /*
2404 * Allocate a user node.
2405 */
2406 uint16_t i = pPool->iUserFreeHead;
2407 if (i == NIL_PGMPOOL_USER_INDEX)
2408 {
2409 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2410 if (RT_FAILURE(rc))
2411 return rc;
2412 i = pPool->iUserFreeHead;
2413 }
2414 pPool->iUserFreeHead = paUsers[i].iNext;
2415
2416 /*
2417 * Initialize the user node and insert it.
2418 */
2419 paUsers[i].iNext = pPage->iUserHead;
2420 paUsers[i].iUser = iUser;
2421 paUsers[i].iUserTable = iUserTable;
2422 pPage->iUserHead = i;
2423
2424# ifdef PGMPOOL_WITH_CACHE
2425 /*
2426 * Tell the cache to update its replacement stats for this page.
2427 */
2428 pgmPoolCacheUsed(pPool, pPage);
2429# endif
2430 return VINF_SUCCESS;
2431}
2432# endif /* PGMPOOL_WITH_CACHE */
2433
2434
2435/**
2436 * Frees a user record associated with a page.
2437 *
2438 * This does not clear the entry in the user table, it simply replaces the
2439 * user record to the chain of free records.
2440 *
2441 * @param pPool The pool.
2442 * @param HCPhys The HC physical address of the shadow page.
2443 * @param iUser The shadow page pool index of the user table.
2444 * @param iUserTable The index into the user table (shadowed).
2445 */
2446static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2447{
2448 /*
2449 * Unlink and free the specified user entry.
2450 */
2451 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2452
2453 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2454 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2455 uint16_t i = pPage->iUserHead;
2456 if ( i != NIL_PGMPOOL_USER_INDEX
2457 && paUsers[i].iUser == iUser
2458 && paUsers[i].iUserTable == iUserTable)
2459 {
2460 pPage->iUserHead = paUsers[i].iNext;
2461
2462 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2463 paUsers[i].iNext = pPool->iUserFreeHead;
2464 pPool->iUserFreeHead = i;
2465 return;
2466 }
2467
2468 /* General: Linear search. */
2469 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2470 while (i != NIL_PGMPOOL_USER_INDEX)
2471 {
2472 if ( paUsers[i].iUser == iUser
2473 && paUsers[i].iUserTable == iUserTable)
2474 {
2475 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2476 paUsers[iPrev].iNext = paUsers[i].iNext;
2477 else
2478 pPage->iUserHead = paUsers[i].iNext;
2479
2480 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2481 paUsers[i].iNext = pPool->iUserFreeHead;
2482 pPool->iUserFreeHead = i;
2483 return;
2484 }
2485 iPrev = i;
2486 i = paUsers[i].iNext;
2487 }
2488
2489 /* Fatal: didn't find it */
2490 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2491 iUser, iUserTable, pPage->GCPhys));
2492}
2493
2494
2495/**
2496 * Gets the entry size of a shadow table.
2497 *
2498 * @param enmKind The kind of page.
2499 *
2500 * @returns The size of the entry in bytes. That is, 4 or 8.
2501 * @returns If the kind is not for a table, an assertion is raised and 0 is
2502 * returned.
2503 */
2504DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2505{
2506 switch (enmKind)
2507 {
2508 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2509 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2510 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2511 case PGMPOOLKIND_32BIT_PD:
2512 case PGMPOOLKIND_32BIT_PD_PHYS:
2513 return 4;
2514
2515 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2516 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2517 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2518 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2519 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2520 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2521 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2522 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2523 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2524 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2525 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2526 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2527 case PGMPOOLKIND_64BIT_PML4:
2528 case PGMPOOLKIND_PAE_PDPT:
2529 case PGMPOOLKIND_ROOT_NESTED:
2530 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2531 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2532 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2533 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2534 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2535 case PGMPOOLKIND_PAE_PD_PHYS:
2536 case PGMPOOLKIND_PAE_PDPT_PHYS:
2537 return 8;
2538
2539 default:
2540 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2541 }
2542}
2543
2544
2545/**
2546 * Gets the entry size of a guest table.
2547 *
2548 * @param enmKind The kind of page.
2549 *
2550 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2551 * @returns If the kind is not for a table, an assertion is raised and 0 is
2552 * returned.
2553 */
2554DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2555{
2556 switch (enmKind)
2557 {
2558 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2559 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2560 case PGMPOOLKIND_32BIT_PD:
2561 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2562 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2563 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2564 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2565 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2566 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2567 return 4;
2568
2569 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2570 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2571 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2572 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2573 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2574 case PGMPOOLKIND_64BIT_PML4:
2575 case PGMPOOLKIND_PAE_PDPT:
2576 return 8;
2577
2578 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2579 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2580 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2581 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2582 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2583 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2584 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2585 case PGMPOOLKIND_ROOT_NESTED:
2586 case PGMPOOLKIND_PAE_PD_PHYS:
2587 case PGMPOOLKIND_PAE_PDPT_PHYS:
2588 case PGMPOOLKIND_32BIT_PD_PHYS:
2589 /** @todo can we return 0? (nobody is calling this...) */
2590 AssertFailed();
2591 return 0;
2592
2593 default:
2594 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2595 }
2596}
2597
2598#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2599
2600/**
2601 * Scans one shadow page table for mappings of a physical page.
2602 *
2603 * @param pVM The VM handle.
2604 * @param pPhysPage The guest page in question.
2605 * @param iShw The shadow page table.
2606 * @param cRefs The number of references made in that PT.
2607 */
2608static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2609{
2610 LogFlow(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%R[pgmpage] iShw=%d cRefs=%d\n", pPhysPage, iShw, cRefs));
2611 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2612
2613 /*
2614 * Assert sanity.
2615 */
2616 Assert(cRefs == 1);
2617 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2618 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2619
2620 /*
2621 * Then, clear the actual mappings to the page in the shadow PT.
2622 */
2623 switch (pPage->enmKind)
2624 {
2625 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2626 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2627 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2628 {
2629 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2630 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2631 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2632 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2633 {
2634 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2635 pPT->a[i].u = 0;
2636 cRefs--;
2637 if (!cRefs)
2638 return;
2639 }
2640#ifdef LOG_ENABLED
2641 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2642 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2643 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2644 {
2645 Log(("i=%d cRefs=%d\n", i, cRefs--));
2646 }
2647#endif
2648 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2649 break;
2650 }
2651
2652 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2653 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2654 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2655 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2656 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2657 {
2658 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2659 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2660 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2661 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2662 {
2663 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2664 pPT->a[i].u = 0;
2665 cRefs--;
2666 if (!cRefs)
2667 return;
2668 }
2669#ifdef LOG_ENABLED
2670 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2671 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2672 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2673 {
2674 Log(("i=%d cRefs=%d\n", i, cRefs--));
2675 }
2676#endif
2677 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u64=%RX64\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u64));
2678 break;
2679 }
2680
2681 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2682 {
2683 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2684 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2685 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2686 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2687 {
2688 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2689 pPT->a[i].u = 0;
2690 cRefs--;
2691 if (!cRefs)
2692 return;
2693 }
2694#ifdef LOG_ENABLED
2695 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2696 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2697 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2698 {
2699 Log(("i=%d cRefs=%d\n", i, cRefs--));
2700 }
2701#endif
2702 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2703 break;
2704 }
2705
2706 default:
2707 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2708 }
2709}
2710
2711
2712/**
2713 * Scans one shadow page table for mappings of a physical page.
2714 *
2715 * @param pVM The VM handle.
2716 * @param pPhysPage The guest page in question.
2717 * @param iShw The shadow page table.
2718 * @param cRefs The number of references made in that PT.
2719 */
2720void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2721{
2722 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
2723 LogFlow(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%R[pgmpage] iShw=%d cRefs=%d\n", pPhysPage, iShw, cRefs));
2724 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2725 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2726 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
2727 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2728}
2729
2730
2731/**
2732 * Flushes a list of shadow page tables mapping the same physical page.
2733 *
2734 * @param pVM The VM handle.
2735 * @param pPhysPage The guest page in question.
2736 * @param iPhysExt The physical cross reference extent list to flush.
2737 */
2738void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2739{
2740 Assert(PGMIsLockOwner(pVM));
2741 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2742 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2743 LogFlow(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%R[pgmpage] iPhysExt\n", pPhysPage, iPhysExt));
2744
2745 const uint16_t iPhysExtStart = iPhysExt;
2746 PPGMPOOLPHYSEXT pPhysExt;
2747 do
2748 {
2749 Assert(iPhysExt < pPool->cMaxPhysExts);
2750 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2751 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2752 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2753 {
2754 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2755 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2756 }
2757
2758 /* next */
2759 iPhysExt = pPhysExt->iNext;
2760 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2761
2762 /* insert the list into the free list and clear the ram range entry. */
2763 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2764 pPool->iPhysExtFreeHead = iPhysExtStart;
2765 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
2766
2767 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2768}
2769
2770#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2771
2772/**
2773 * Flushes all shadow page table mappings of the given guest page.
2774 *
2775 * This is typically called when the host page backing the guest one has been
2776 * replaced or when the page protection was changed due to an access handler.
2777 *
2778 * @returns VBox status code.
2779 * @retval VINF_SUCCESS if all references has been successfully cleared.
2780 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
2781 * pool cleaning. FF and sync flags are set.
2782 *
2783 * @param pVM The VM handle.
2784 * @param pPhysPage The guest page in question.
2785 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
2786 * flushed, it is NOT touched if this isn't necessary.
2787 * The caller MUST initialized this to @a false.
2788 */
2789int pgmPoolTrackFlushGCPhys(PVM pVM, PPGMPAGE pPhysPage, bool *pfFlushTLBs)
2790{
2791 PVMCPU pVCpu = VMMGetCpu(pVM);
2792 pgmLock(pVM);
2793 int rc = VINF_SUCCESS;
2794#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2795 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
2796 if (u16)
2797 {
2798 /*
2799 * The zero page is currently screwing up the tracking and we'll
2800 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2801 * is defined, zero pages won't normally be mapped. Some kind of solution
2802 * will be needed for this problem of course, but it will have to wait...
2803 */
2804 if (PGM_PAGE_IS_ZERO(pPhysPage))
2805 rc = VINF_PGM_GCPHYS_ALIASED;
2806 else
2807 {
2808# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
2809 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
2810 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
2811 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
2812# endif
2813
2814 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
2815 pgmPoolTrackFlushGCPhysPT(pVM,
2816 pPhysPage,
2817 PGMPOOL_TD_GET_IDX(u16),
2818 PGMPOOL_TD_GET_CREFS(u16));
2819 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
2820 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, PGMPOOL_TD_GET_IDX(u16));
2821 else
2822 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
2823 *pfFlushTLBs = true;
2824
2825# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
2826 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
2827# endif
2828 }
2829 }
2830
2831#elif defined(PGMPOOL_WITH_CACHE)
2832 if (PGM_PAGE_IS_ZERO(pPhysPage))
2833 rc = VINF_PGM_GCPHYS_ALIASED;
2834 else
2835 {
2836# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
2837 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow kill the pool otherwise. */
2838 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
2839# endif
2840 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
2841 if (rc == VINF_SUCCESS)
2842 *pfFlushTLBs = true;
2843 }
2844
2845# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
2846 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
2847# endif
2848
2849#else
2850 rc = VINF_PGM_GCPHYS_ALIASED;
2851#endif
2852
2853 if (rc == VINF_PGM_GCPHYS_ALIASED)
2854 {
2855 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
2856 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
2857 rc = VINF_PGM_SYNC_CR3;
2858 }
2859 pgmUnlock(pVM);
2860 return rc;
2861}
2862
2863
2864/**
2865 * Scans all shadow page tables for mappings of a physical page.
2866 *
2867 * This may be slow, but it's most likely more efficient than cleaning
2868 * out the entire page pool / cache.
2869 *
2870 * @returns VBox status code.
2871 * @retval VINF_SUCCESS if all references has been successfully cleared.
2872 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2873 * a page pool cleaning.
2874 *
2875 * @param pVM The VM handle.
2876 * @param pPhysPage The guest page in question.
2877 */
2878int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
2879{
2880 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2881 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2882 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
2883 pPool->cUsedPages, pPool->cPresent, pPhysPage));
2884
2885#if 1
2886 /*
2887 * There is a limit to what makes sense.
2888 */
2889 if (pPool->cPresent > 1024)
2890 {
2891 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2892 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2893 return VINF_PGM_GCPHYS_ALIASED;
2894 }
2895#endif
2896
2897 /*
2898 * Iterate all the pages until we've encountered all that in use.
2899 * This is simple but not quite optimal solution.
2900 */
2901 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2902 const uint32_t u32 = u64;
2903 unsigned cLeft = pPool->cUsedPages;
2904 unsigned iPage = pPool->cCurPages;
2905 while (--iPage >= PGMPOOL_IDX_FIRST)
2906 {
2907 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2908 if (pPage->GCPhys != NIL_RTGCPHYS)
2909 {
2910 switch (pPage->enmKind)
2911 {
2912 /*
2913 * We only care about shadow page tables.
2914 */
2915 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2916 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2917 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2918 {
2919 unsigned cPresent = pPage->cPresent;
2920 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2921 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2922 if (pPT->a[i].n.u1Present)
2923 {
2924 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2925 {
2926 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
2927 pPT->a[i].u = 0;
2928 }
2929 if (!--cPresent)
2930 break;
2931 }
2932 break;
2933 }
2934
2935 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2936 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2937 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2938 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2939 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2940 {
2941 unsigned cPresent = pPage->cPresent;
2942 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2943 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2944 if (pPT->a[i].n.u1Present)
2945 {
2946 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2947 {
2948 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
2949 pPT->a[i].u = 0;
2950 }
2951 if (!--cPresent)
2952 break;
2953 }
2954 break;
2955 }
2956 }
2957 if (!--cLeft)
2958 break;
2959 }
2960 }
2961
2962 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
2963 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2964 return VINF_SUCCESS;
2965}
2966
2967
2968/**
2969 * Clears the user entry in a user table.
2970 *
2971 * This is used to remove all references to a page when flushing it.
2972 */
2973static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
2974{
2975 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
2976 Assert(pUser->iUser < pPool->cCurPages);
2977 uint32_t iUserTable = pUser->iUserTable;
2978
2979 /*
2980 * Map the user page.
2981 */
2982 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
2983 union
2984 {
2985 uint64_t *pau64;
2986 uint32_t *pau32;
2987 } u;
2988 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
2989
2990 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
2991
2992 /* Safety precaution in case we change the paging for other modes too in the future. */
2993 Assert(!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage));
2994
2995#ifdef VBOX_STRICT
2996 /*
2997 * Some sanity checks.
2998 */
2999 switch (pUserPage->enmKind)
3000 {
3001 case PGMPOOLKIND_32BIT_PD:
3002 case PGMPOOLKIND_32BIT_PD_PHYS:
3003 Assert(iUserTable < X86_PG_ENTRIES);
3004 break;
3005 case PGMPOOLKIND_PAE_PDPT:
3006 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3007 case PGMPOOLKIND_PAE_PDPT_PHYS:
3008 Assert(iUserTable < 4);
3009 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3010 break;
3011 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3012 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3013 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3014 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3015 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3016 case PGMPOOLKIND_PAE_PD_PHYS:
3017 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3018 break;
3019 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3020 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3021 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3022 break;
3023 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3024 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3025 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3026 break;
3027 case PGMPOOLKIND_64BIT_PML4:
3028 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3029 /* GCPhys >> PAGE_SHIFT is the index here */
3030 break;
3031 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3032 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3033 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3034 break;
3035
3036 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3037 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3038 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3039 break;
3040
3041 case PGMPOOLKIND_ROOT_NESTED:
3042 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3043 break;
3044
3045 default:
3046 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3047 break;
3048 }
3049#endif /* VBOX_STRICT */
3050
3051 /*
3052 * Clear the entry in the user page.
3053 */
3054 switch (pUserPage->enmKind)
3055 {
3056 /* 32-bit entries */
3057 case PGMPOOLKIND_32BIT_PD:
3058 case PGMPOOLKIND_32BIT_PD_PHYS:
3059 u.pau32[iUserTable] = 0;
3060 break;
3061
3062 /* 64-bit entries */
3063 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3064 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3065 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3066 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3067 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3068#if defined(IN_RC)
3069 /* In 32 bits PAE mode we *must* invalidate the TLB when changing a PDPT entry; the CPU fetches them only during cr3 load, so any
3070 * non-present PDPT will continue to cause page faults.
3071 */
3072 ASMReloadCR3();
3073#endif
3074 /* no break */
3075 case PGMPOOLKIND_PAE_PD_PHYS:
3076 case PGMPOOLKIND_PAE_PDPT_PHYS:
3077 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3078 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3079 case PGMPOOLKIND_64BIT_PML4:
3080 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3081 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3082 case PGMPOOLKIND_PAE_PDPT:
3083 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3084 case PGMPOOLKIND_ROOT_NESTED:
3085 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3086 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3087 u.pau64[iUserTable] = 0;
3088 break;
3089
3090 default:
3091 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3092 }
3093}
3094
3095
3096/**
3097 * Clears all users of a page.
3098 */
3099static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3100{
3101 /*
3102 * Free all the user records.
3103 */
3104 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3105
3106 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3107 uint16_t i = pPage->iUserHead;
3108 while (i != NIL_PGMPOOL_USER_INDEX)
3109 {
3110 /* Clear enter in user table. */
3111 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3112
3113 /* Free it. */
3114 const uint16_t iNext = paUsers[i].iNext;
3115 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3116 paUsers[i].iNext = pPool->iUserFreeHead;
3117 pPool->iUserFreeHead = i;
3118
3119 /* Next. */
3120 i = iNext;
3121 }
3122 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3123}
3124
3125#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3126
3127/**
3128 * Allocates a new physical cross reference extent.
3129 *
3130 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3131 * @param pVM The VM handle.
3132 * @param piPhysExt Where to store the phys ext index.
3133 */
3134PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3135{
3136 Assert(PGMIsLockOwner(pVM));
3137 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3138 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3139 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3140 {
3141 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3142 return NULL;
3143 }
3144 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3145 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3146 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3147 *piPhysExt = iPhysExt;
3148 return pPhysExt;
3149}
3150
3151
3152/**
3153 * Frees a physical cross reference extent.
3154 *
3155 * @param pVM The VM handle.
3156 * @param iPhysExt The extent to free.
3157 */
3158void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3159{
3160 Assert(PGMIsLockOwner(pVM));
3161 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3162 Assert(iPhysExt < pPool->cMaxPhysExts);
3163 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3164 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3165 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3166 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3167 pPool->iPhysExtFreeHead = iPhysExt;
3168}
3169
3170
3171/**
3172 * Frees a physical cross reference extent.
3173 *
3174 * @param pVM The VM handle.
3175 * @param iPhysExt The extent to free.
3176 */
3177void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3178{
3179 Assert(PGMIsLockOwner(pVM));
3180 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3181
3182 const uint16_t iPhysExtStart = iPhysExt;
3183 PPGMPOOLPHYSEXT pPhysExt;
3184 do
3185 {
3186 Assert(iPhysExt < pPool->cMaxPhysExts);
3187 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3188 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3189 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3190
3191 /* next */
3192 iPhysExt = pPhysExt->iNext;
3193 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3194
3195 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3196 pPool->iPhysExtFreeHead = iPhysExtStart;
3197}
3198
3199
3200/**
3201 * Insert a reference into a list of physical cross reference extents.
3202 *
3203 * @returns The new tracking data for PGMPAGE.
3204 *
3205 * @param pVM The VM handle.
3206 * @param iPhysExt The physical extent index of the list head.
3207 * @param iShwPT The shadow page table index.
3208 *
3209 */
3210static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
3211{
3212 Assert(PGMIsLockOwner(pVM));
3213 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3214 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3215
3216 /* special common case. */
3217 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3218 {
3219 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3220 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3221 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d}\n", iPhysExt, iShwPT));
3222 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3223 }
3224
3225 /* general treatment. */
3226 const uint16_t iPhysExtStart = iPhysExt;
3227 unsigned cMax = 15;
3228 for (;;)
3229 {
3230 Assert(iPhysExt < pPool->cMaxPhysExts);
3231 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3232 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3233 {
3234 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3235 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3236 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
3237 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3238 }
3239 if (!--cMax)
3240 {
3241 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3242 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3243 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
3244 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3245 }
3246 }
3247
3248 /* add another extent to the list. */
3249 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3250 if (!pNew)
3251 {
3252 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3253 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3254 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
3255 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3256 }
3257 pNew->iNext = iPhysExtStart;
3258 pNew->aidx[0] = iShwPT;
3259 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
3260 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3261}
3262
3263
3264/**
3265 * Add a reference to guest physical page where extents are in use.
3266 *
3267 * @returns The new tracking data for PGMPAGE.
3268 *
3269 * @param pVM The VM handle.
3270 * @param u16 The ram range flags (top 16-bits).
3271 * @param iShwPT The shadow page table index.
3272 */
3273uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
3274{
3275 pgmLock(pVM);
3276 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3277 {
3278 /*
3279 * Convert to extent list.
3280 */
3281 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3282 uint16_t iPhysExt;
3283 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3284 if (pPhysExt)
3285 {
3286 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
3287 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
3288 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
3289 pPhysExt->aidx[1] = iShwPT;
3290 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3291 }
3292 else
3293 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3294 }
3295 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3296 {
3297 /*
3298 * Insert into the extent list.
3299 */
3300 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT);
3301 }
3302 else
3303 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3304 pgmUnlock(pVM);
3305 return u16;
3306}
3307
3308
3309/**
3310 * Clear references to guest physical memory.
3311 *
3312 * @param pPool The pool.
3313 * @param pPage The page.
3314 * @param pPhysPage Pointer to the aPages entry in the ram range.
3315 */
3316void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
3317{
3318 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
3319 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3320
3321 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
3322 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
3323 {
3324 PVM pVM = pPool->CTX_SUFF(pVM);
3325 pgmLock(pVM);
3326
3327 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3328 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3329 do
3330 {
3331 Assert(iPhysExt < pPool->cMaxPhysExts);
3332
3333 /*
3334 * Look for the shadow page and check if it's all freed.
3335 */
3336 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3337 {
3338 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
3339 {
3340 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3341
3342 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3343 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3344 {
3345 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3346 pgmUnlock(pVM);
3347 return;
3348 }
3349
3350 /* we can free the node. */
3351 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3352 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3353 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3354 {
3355 /* lonely node */
3356 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3357 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
3358 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3359 }
3360 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3361 {
3362 /* head */
3363 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
3364 PGM_PAGE_SET_TRACKING(pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
3365 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3366 }
3367 else
3368 {
3369 /* in list */
3370 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3371 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3372 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3373 }
3374 iPhysExt = iPhysExtNext;
3375 pgmUnlock(pVM);
3376 return;
3377 }
3378 }
3379
3380 /* next */
3381 iPhysExtPrev = iPhysExt;
3382 iPhysExt = paPhysExts[iPhysExt].iNext;
3383 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3384
3385 pgmUnlock(pVM);
3386 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3387 }
3388 else /* nothing to do */
3389 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
3390}
3391
3392
3393/**
3394 * Clear references to guest physical memory.
3395 *
3396 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3397 * is assumed to be correct, so the linear search can be skipped and we can assert
3398 * at an earlier point.
3399 *
3400 * @param pPool The pool.
3401 * @param pPage The page.
3402 * @param HCPhys The host physical address corresponding to the guest page.
3403 * @param GCPhys The guest physical address corresponding to HCPhys.
3404 */
3405static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3406{
3407 /*
3408 * Walk range list.
3409 */
3410 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3411 while (pRam)
3412 {
3413 RTGCPHYS off = GCPhys - pRam->GCPhys;
3414 if (off < pRam->cb)
3415 {
3416 /* does it match? */
3417 const unsigned iPage = off >> PAGE_SHIFT;
3418 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3419#ifdef LOG_ENABLED
3420RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3421Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
3422#endif
3423 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3424 {
3425 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3426 return;
3427 }
3428 break;
3429 }
3430 pRam = pRam->CTX_SUFF(pNext);
3431 }
3432 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
3433}
3434
3435
3436/**
3437 * Clear references to guest physical memory.
3438 *
3439 * @param pPool The pool.
3440 * @param pPage The page.
3441 * @param HCPhys The host physical address corresponding to the guest page.
3442 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3443 */
3444static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3445{
3446 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
3447
3448 /*
3449 * Walk range list.
3450 */
3451 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3452 while (pRam)
3453 {
3454 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3455 if (off < pRam->cb)
3456 {
3457 /* does it match? */
3458 const unsigned iPage = off >> PAGE_SHIFT;
3459 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3460 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3461 {
3462 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3463 return;
3464 }
3465 break;
3466 }
3467 pRam = pRam->CTX_SUFF(pNext);
3468 }
3469
3470 /*
3471 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3472 */
3473 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3474 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3475 while (pRam)
3476 {
3477 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3478 while (iPage-- > 0)
3479 {
3480 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3481 {
3482 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
3483 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3484 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3485 return;
3486 }
3487 }
3488 pRam = pRam->CTX_SUFF(pNext);
3489 }
3490
3491 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
3492}
3493
3494
3495/**
3496 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3497 *
3498 * @param pPool The pool.
3499 * @param pPage The page.
3500 * @param pShwPT The shadow page table (mapping of the page).
3501 * @param pGstPT The guest page table.
3502 */
3503DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3504{
3505 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3506 if (pShwPT->a[i].n.u1Present)
3507 {
3508 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3509 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3510 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3511 if (!--pPage->cPresent)
3512 break;
3513 }
3514}
3515
3516
3517/**
3518 * Clear references to guest physical memory in a PAE / 32-bit page table.
3519 *
3520 * @param pPool The pool.
3521 * @param pPage The page.
3522 * @param pShwPT The shadow page table (mapping of the page).
3523 * @param pGstPT The guest page table (just a half one).
3524 */
3525DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3526{
3527 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3528 if (pShwPT->a[i].n.u1Present)
3529 {
3530 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
3531 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3532 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3533 }
3534}
3535
3536
3537/**
3538 * Clear references to guest physical memory in a PAE / PAE page table.
3539 *
3540 * @param pPool The pool.
3541 * @param pPage The page.
3542 * @param pShwPT The shadow page table (mapping of the page).
3543 * @param pGstPT The guest page table.
3544 */
3545DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3546{
3547 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3548 if (pShwPT->a[i].n.u1Present)
3549 {
3550 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3551 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3552 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3553 }
3554}
3555
3556
3557/**
3558 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3559 *
3560 * @param pPool The pool.
3561 * @param pPage The page.
3562 * @param pShwPT The shadow page table (mapping of the page).
3563 */
3564DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3565{
3566 RTGCPHYS GCPhys = pPage->GCPhys;
3567 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3568 if (pShwPT->a[i].n.u1Present)
3569 {
3570 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3571 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3572 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3573 }
3574}
3575
3576
3577/**
3578 * Clear references to guest physical memory in a PAE / 2/4MB page table.
3579 *
3580 * @param pPool The pool.
3581 * @param pPage The page.
3582 * @param pShwPT The shadow page table (mapping of the page).
3583 */
3584DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
3585{
3586 RTGCPHYS GCPhys = pPage->GCPhys;
3587 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3588 if (pShwPT->a[i].n.u1Present)
3589 {
3590 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
3591 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
3592 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
3593 }
3594}
3595
3596#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3597
3598
3599/**
3600 * Clear references to shadowed pages in a 32 bits page directory.
3601 *
3602 * @param pPool The pool.
3603 * @param pPage The page.
3604 * @param pShwPD The shadow page directory (mapping of the page).
3605 */
3606DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
3607{
3608 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3609 {
3610 if ( pShwPD->a[i].n.u1Present
3611 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
3612 )
3613 {
3614 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
3615 if (pSubPage)
3616 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3617 else
3618 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
3619 }
3620 }
3621}
3622
3623/**
3624 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
3625 *
3626 * @param pPool The pool.
3627 * @param pPage The page.
3628 * @param pShwPD The shadow page directory (mapping of the page).
3629 */
3630DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
3631{
3632 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3633 {
3634 if ( pShwPD->a[i].n.u1Present
3635 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
3636 )
3637 {
3638 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
3639 if (pSubPage)
3640 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3641 else
3642 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
3643 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3644 }
3645 }
3646}
3647
3648/**
3649 * Clear references to shadowed pages in a PAE page directory pointer table.
3650 *
3651 * @param pPool The pool.
3652 * @param pPage The page.
3653 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3654 */
3655DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
3656{
3657 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
3658 {
3659 if ( pShwPDPT->a[i].n.u1Present
3660 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
3661 )
3662 {
3663 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
3664 if (pSubPage)
3665 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3666 else
3667 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
3668 }
3669 }
3670}
3671
3672
3673/**
3674 * Clear references to shadowed pages in a 64-bit page directory pointer table.
3675 *
3676 * @param pPool The pool.
3677 * @param pPage The page.
3678 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3679 */
3680DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
3681{
3682 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3683 {
3684 Assert(!(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING));
3685 if (pShwPDPT->a[i].n.u1Present)
3686 {
3687 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
3688 if (pSubPage)
3689 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3690 else
3691 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
3692 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3693 }
3694 }
3695}
3696
3697
3698/**
3699 * Clear references to shadowed pages in a 64-bit level 4 page table.
3700 *
3701 * @param pPool The pool.
3702 * @param pPage The page.
3703 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3704 */
3705DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
3706{
3707 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
3708 {
3709 if (pShwPML4->a[i].n.u1Present)
3710 {
3711 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
3712 if (pSubPage)
3713 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3714 else
3715 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
3716 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3717 }
3718 }
3719}
3720
3721
3722/**
3723 * Clear references to shadowed pages in an EPT page table.
3724 *
3725 * @param pPool The pool.
3726 * @param pPage The page.
3727 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3728 */
3729DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
3730{
3731 RTGCPHYS GCPhys = pPage->GCPhys;
3732 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3733 if (pShwPT->a[i].n.u1Present)
3734 {
3735 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
3736 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
3737 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
3738 }
3739}
3740
3741
3742/**
3743 * Clear references to shadowed pages in an EPT page directory.
3744 *
3745 * @param pPool The pool.
3746 * @param pPage The page.
3747 * @param pShwPD The shadow page directory (mapping of the page).
3748 */
3749DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
3750{
3751 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3752 {
3753 if (pShwPD->a[i].n.u1Present)
3754 {
3755 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
3756 if (pSubPage)
3757 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3758 else
3759 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
3760 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3761 }
3762 }
3763}
3764
3765
3766/**
3767 * Clear references to shadowed pages in an EPT page directory pointer table.
3768 *
3769 * @param pPool The pool.
3770 * @param pPage The page.
3771 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3772 */
3773DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
3774{
3775 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3776 {
3777 if (pShwPDPT->a[i].n.u1Present)
3778 {
3779 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
3780 if (pSubPage)
3781 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3782 else
3783 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
3784 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3785 }
3786 }
3787}
3788
3789
3790/**
3791 * Clears all references made by this page.
3792 *
3793 * This includes other shadow pages and GC physical addresses.
3794 *
3795 * @param pPool The pool.
3796 * @param pPage The page.
3797 */
3798static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3799{
3800 /*
3801 * Map the shadow page and take action according to the page kind.
3802 */
3803 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
3804 switch (pPage->enmKind)
3805 {
3806#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3807 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3808 {
3809 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3810 void *pvGst;
3811 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3812 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
3813 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3814 break;
3815 }
3816
3817 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3818 {
3819 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3820 void *pvGst;
3821 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3822 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
3823 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3824 break;
3825 }
3826
3827 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3828 {
3829 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3830 void *pvGst;
3831 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3832 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
3833 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3834 break;
3835 }
3836
3837 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
3838 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3839 {
3840 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3841 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
3842 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3843 break;
3844 }
3845
3846 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
3847 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3848 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3849 {
3850 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3851 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
3852 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3853 break;
3854 }
3855
3856#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3857 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3858 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3859 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3860 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3861 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3862 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3863 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3864 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3865 break;
3866#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3867
3868 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3869 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3870 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3871 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3872 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3873 case PGMPOOLKIND_PAE_PD_PHYS:
3874 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3875 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3876 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
3877 break;
3878
3879 case PGMPOOLKIND_32BIT_PD_PHYS:
3880 case PGMPOOLKIND_32BIT_PD:
3881 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
3882 break;
3883
3884 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3885 case PGMPOOLKIND_PAE_PDPT:
3886 case PGMPOOLKIND_PAE_PDPT_PHYS:
3887 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
3888 break;
3889
3890 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3891 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3892 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
3893 break;
3894
3895 case PGMPOOLKIND_64BIT_PML4:
3896 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
3897 break;
3898
3899 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3900 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
3901 break;
3902
3903 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3904 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
3905 break;
3906
3907 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3908 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
3909 break;
3910
3911 default:
3912 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
3913 }
3914
3915 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
3916 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3917 ASMMemZeroPage(pvShw);
3918 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3919 pPage->fZeroed = true;
3920 PGMPOOL_UNLOCK_PTR(pPool->CTX_SUFF(pVM), pvShw);
3921}
3922#endif /* PGMPOOL_WITH_USER_TRACKING */
3923
3924/**
3925 * Flushes a pool page.
3926 *
3927 * This moves the page to the free list after removing all user references to it.
3928 *
3929 * @returns VBox status code.
3930 * @retval VINF_SUCCESS on success.
3931 * @param pPool The pool.
3932 * @param HCPhys The HC physical address of the shadow page.
3933 */
3934int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3935{
3936 PVM pVM = pPool->CTX_SUFF(pVM);
3937
3938 int rc = VINF_SUCCESS;
3939 STAM_PROFILE_START(&pPool->StatFlushPage, f);
3940 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
3941 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
3942
3943 /*
3944 * Quietly reject any attempts at flushing any of the special root pages.
3945 */
3946 if (pPage->idx < PGMPOOL_IDX_FIRST)
3947 {
3948 AssertFailed(); /* can no longer happen */
3949 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
3950 return VINF_SUCCESS;
3951 }
3952
3953 pgmLock(pVM);
3954
3955 /*
3956 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
3957 */
3958 if (pgmPoolIsPageLocked(&pVM->pgm.s, pPage))
3959 {
3960 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
3961 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
3962 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
3963 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
3964 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
3965 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
3966 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
3967 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
3968 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD,
3969 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
3970 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
3971 pgmUnlock(pVM);
3972 return VINF_SUCCESS;
3973 }
3974
3975#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3976 /* Start a subset so we won't run out of mapping space. */
3977 PVMCPU pVCpu = VMMGetCpu(pVM);
3978 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
3979#endif
3980
3981 /*
3982 * Mark the page as being in need of a ASMMemZeroPage().
3983 */
3984 pPage->fZeroed = false;
3985
3986#ifdef PGMPOOL_WITH_USER_TRACKING
3987 /*
3988 * Clear the page.
3989 */
3990 pgmPoolTrackClearPageUsers(pPool, pPage);
3991 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
3992 pgmPoolTrackDeref(pPool, pPage);
3993 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
3994#endif
3995
3996#ifdef PGMPOOL_WITH_CACHE
3997 /*
3998 * Flush it from the cache.
3999 */
4000 pgmPoolCacheFlushPage(pPool, pPage);
4001#endif /* PGMPOOL_WITH_CACHE */
4002
4003#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4004 /* Heavy stuff done. */
4005 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4006#endif
4007
4008#ifdef PGMPOOL_WITH_MONITORING
4009 /*
4010 * Deregistering the monitoring.
4011 */
4012 if (pPage->fMonitored)
4013 rc = pgmPoolMonitorFlush(pPool, pPage);
4014#endif
4015
4016 /*
4017 * Free the page.
4018 */
4019 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4020 pPage->iNext = pPool->iFreeHead;
4021 pPool->iFreeHead = pPage->idx;
4022 pPage->enmKind = PGMPOOLKIND_FREE;
4023 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4024 pPage->GCPhys = NIL_RTGCPHYS;
4025 pPage->fReusedFlushPending = false;
4026
4027 pPool->cUsedPages--;
4028 pgmUnlock(pVM);
4029 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4030 return rc;
4031}
4032
4033
4034/**
4035 * Frees a usage of a pool page.
4036 *
4037 * The caller is responsible to updating the user table so that it no longer
4038 * references the shadow page.
4039 *
4040 * @param pPool The pool.
4041 * @param HCPhys The HC physical address of the shadow page.
4042 * @param iUser The shadow page pool index of the user table.
4043 * @param iUserTable The index into the user table (shadowed).
4044 */
4045void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4046{
4047 PVM pVM = pPool->CTX_SUFF(pVM);
4048
4049 STAM_PROFILE_START(&pPool->StatFree, a);
4050 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%#x iUserTable=%#x\n",
4051 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4052 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4053 pgmLock(pVM);
4054#ifdef PGMPOOL_WITH_USER_TRACKING
4055 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4056#endif
4057#ifdef PGMPOOL_WITH_CACHE
4058 if (!pPage->fCached)
4059#endif
4060 pgmPoolFlushPage(pPool, pPage);
4061 pgmUnlock(pVM);
4062 STAM_PROFILE_STOP(&pPool->StatFree, a);
4063}
4064
4065
4066/**
4067 * Makes one or more free page free.
4068 *
4069 * @returns VBox status code.
4070 * @retval VINF_SUCCESS on success.
4071 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4072 *
4073 * @param pPool The pool.
4074 * @param enmKind Page table kind
4075 * @param iUser The user of the page.
4076 */
4077static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4078{
4079 PVM pVM = pPool->CTX_SUFF(pVM);
4080
4081 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
4082
4083 /*
4084 * If the pool isn't full grown yet, expand it.
4085 */
4086 if ( pPool->cCurPages < pPool->cMaxPages
4087#if defined(IN_RC)
4088 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
4089 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4090 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
4091#endif
4092 )
4093 {
4094 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4095#ifdef IN_RING3
4096 int rc = PGMR3PoolGrow(pVM);
4097#else
4098 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
4099#endif
4100 if (RT_FAILURE(rc))
4101 return rc;
4102 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4103 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4104 return VINF_SUCCESS;
4105 }
4106
4107#ifdef PGMPOOL_WITH_CACHE
4108 /*
4109 * Free one cached page.
4110 */
4111 return pgmPoolCacheFreeOne(pPool, iUser);
4112#else
4113 /*
4114 * Flush the pool.
4115 *
4116 * If we have tracking enabled, it should be possible to come up with
4117 * a cheap replacement strategy...
4118 */
4119 /* @todo This path no longer works (CR3 root pages will be flushed)!! */
4120 AssertCompileFailed();
4121 Assert(!CPUMIsGuestInLongMode(pVM));
4122 pgmPoolFlushAllInt(pPool);
4123 return VERR_PGM_POOL_FLUSHED;
4124#endif
4125}
4126
4127/**
4128 * Allocates a page from the pool.
4129 *
4130 * This page may actually be a cached page and not in need of any processing
4131 * on the callers part.
4132 *
4133 * @returns VBox status code.
4134 * @retval VINF_SUCCESS if a NEW page was allocated.
4135 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4136 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4137 * @param pVM The VM handle.
4138 * @param GCPhys The GC physical address of the page we're gonna shadow.
4139 * For 4MB and 2MB PD entries, it's the first address the
4140 * shadow PT is covering.
4141 * @param enmKind The kind of mapping.
4142 * @param enmAccess Access type for the mapping (only relevant for big pages)
4143 * @param iUser The shadow page pool index of the user table.
4144 * @param iUserTable The index into the user table (shadowed).
4145 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4146 * @param fLockPage Lock the page
4147 */
4148int pgmPoolAllocEx(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage, bool fLockPage)
4149{
4150 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4151 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4152 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%#x iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4153 *ppPage = NULL;
4154 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4155 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4156 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4157
4158 pgmLock(pVM);
4159
4160#ifdef PGMPOOL_WITH_CACHE
4161 if (pPool->fCacheEnabled)
4162 {
4163 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, iUser, iUserTable, ppPage);
4164 if (RT_SUCCESS(rc2))
4165 {
4166 if (fLockPage)
4167 pgmPoolLockPage(pPool, *ppPage);
4168 pgmUnlock(pVM);
4169 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4170 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4171 return rc2;
4172 }
4173 }
4174#endif
4175
4176 /*
4177 * Allocate a new one.
4178 */
4179 int rc = VINF_SUCCESS;
4180 uint16_t iNew = pPool->iFreeHead;
4181 if (iNew == NIL_PGMPOOL_IDX)
4182 {
4183 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
4184 if (RT_FAILURE(rc))
4185 {
4186 pgmUnlock(pVM);
4187 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4188 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4189 return rc;
4190 }
4191 iNew = pPool->iFreeHead;
4192 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4193 }
4194
4195 /* unlink the free head */
4196 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4197 pPool->iFreeHead = pPage->iNext;
4198 pPage->iNext = NIL_PGMPOOL_IDX;
4199
4200 /*
4201 * Initialize it.
4202 */
4203 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4204 pPage->enmKind = enmKind;
4205 pPage->enmAccess = enmAccess;
4206 pPage->GCPhys = GCPhys;
4207 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4208 pPage->fMonitored = false;
4209 pPage->fCached = false;
4210 pPage->fReusedFlushPending = false;
4211#ifdef PGMPOOL_WITH_MONITORING
4212 pPage->cModifications = 0;
4213 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4214 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4215#else
4216 pPage->fCR3Mix = false;
4217#endif
4218#ifdef PGMPOOL_WITH_USER_TRACKING
4219 pPage->cPresent = 0;
4220 pPage->iFirstPresent = ~0;
4221 pPage->pvLastAccessHandlerFault = 0;
4222 pPage->cLastAccessHandlerCount = 0;
4223 pPage->pvLastAccessHandlerRip = 0;
4224
4225 /*
4226 * Insert into the tracking and cache. If this fails, free the page.
4227 */
4228 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4229 if (RT_FAILURE(rc3))
4230 {
4231 pPool->cUsedPages--;
4232 pPage->enmKind = PGMPOOLKIND_FREE;
4233 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4234 pPage->GCPhys = NIL_RTGCPHYS;
4235 pPage->iNext = pPool->iFreeHead;
4236 pPool->iFreeHead = pPage->idx;
4237 pgmUnlock(pVM);
4238 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4239 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4240 return rc3;
4241 }
4242#endif /* PGMPOOL_WITH_USER_TRACKING */
4243
4244 /*
4245 * Commit the allocation, clear the page and return.
4246 */
4247#ifdef VBOX_WITH_STATISTICS
4248 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4249 pPool->cUsedPagesHigh = pPool->cUsedPages;
4250#endif
4251
4252 if (!pPage->fZeroed)
4253 {
4254 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4255 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4256 ASMMemZeroPage(pv);
4257 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4258 }
4259
4260 *ppPage = pPage;
4261 if (fLockPage)
4262 pgmPoolLockPage(pPool, pPage);
4263 pgmUnlock(pVM);
4264 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4265 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4266 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4267 return rc;
4268}
4269
4270
4271/**
4272 * Frees a usage of a pool page.
4273 *
4274 * @param pVM The VM handle.
4275 * @param HCPhys The HC physical address of the shadow page.
4276 * @param iUser The shadow page pool index of the user table.
4277 * @param iUserTable The index into the user table (shadowed).
4278 */
4279void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4280{
4281 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4282 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4283 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4284}
4285
4286/**
4287 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
4288 *
4289 * @returns Pointer to the shadow page structure.
4290 * @param pPool The pool.
4291 * @param HCPhys The HC physical address of the shadow page.
4292 */
4293PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
4294{
4295 PVM pVM = pPool->CTX_SUFF(pVM);
4296
4297 Assert(PGMIsLockOwner(pVM));
4298
4299 /*
4300 * Look up the page.
4301 */
4302 pgmLock(pVM);
4303 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
4304 pgmUnlock(pVM);
4305
4306 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
4307 return pPage;
4308}
4309
4310
4311#ifdef IN_RING3
4312/**
4313 * Flushes the entire cache.
4314 *
4315 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4316 * and execute this CR3 flush.
4317 *
4318 * @param pPool The pool.
4319 */
4320void pgmR3PoolReset(PVM pVM)
4321{
4322 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4323
4324 Assert(PGMIsLockOwner(pVM));
4325 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
4326 LogFlow(("pgmPoolFlushAllInt:\n"));
4327
4328 /*
4329 * If there are no pages in the pool, there is nothing to do.
4330 */
4331 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
4332 {
4333 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
4334 return;
4335 }
4336
4337 /*
4338 * Exit the shadow mode since we're going to clear everything,
4339 * including the root page.
4340 */
4341 for (unsigned i=0;i<pVM->cCPUs;i++)
4342 {
4343 PVMCPU pVCpu = &pVM->aCpus[i];
4344 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
4345 }
4346
4347 /*
4348 * Nuke the free list and reinsert all pages into it.
4349 */
4350 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
4351 {
4352 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4353
4354 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
4355#ifdef PGMPOOL_WITH_MONITORING
4356 if (pPage->fMonitored)
4357 pgmPoolMonitorFlush(pPool, pPage);
4358 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4359 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4360 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4361 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4362 pPage->cModifications = 0;
4363#endif
4364 pPage->GCPhys = NIL_RTGCPHYS;
4365 pPage->enmKind = PGMPOOLKIND_FREE;
4366 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4367 Assert(pPage->idx == i);
4368 pPage->iNext = i + 1;
4369 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
4370 pPage->fSeenNonGlobal = false;
4371 pPage->fMonitored = false;
4372 pPage->fCached = false;
4373 pPage->fReusedFlushPending = false;
4374#ifdef PGMPOOL_WITH_USER_TRACKING
4375 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
4376#else
4377 pPage->fCR3Mix = false;
4378#endif
4379#ifdef PGMPOOL_WITH_CACHE
4380 pPage->iAgeNext = NIL_PGMPOOL_IDX;
4381 pPage->iAgePrev = NIL_PGMPOOL_IDX;
4382#endif
4383 pPage->cLocked = 0;
4384 }
4385 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
4386 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
4387 pPool->cUsedPages = 0;
4388
4389#ifdef PGMPOOL_WITH_USER_TRACKING
4390 /*
4391 * Zap and reinitialize the user records.
4392 */
4393 pPool->cPresent = 0;
4394 pPool->iUserFreeHead = 0;
4395 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
4396 const unsigned cMaxUsers = pPool->cMaxUsers;
4397 for (unsigned i = 0; i < cMaxUsers; i++)
4398 {
4399 paUsers[i].iNext = i + 1;
4400 paUsers[i].iUser = NIL_PGMPOOL_IDX;
4401 paUsers[i].iUserTable = 0xfffffffe;
4402 }
4403 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
4404#endif
4405
4406#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
4407 /*
4408 * Clear all the GCPhys links and rebuild the phys ext free list.
4409 */
4410 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
4411 pRam;
4412 pRam = pRam->CTX_SUFF(pNext))
4413 {
4414 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4415 while (iPage-- > 0)
4416 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
4417 }
4418
4419 pPool->iPhysExtFreeHead = 0;
4420 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4421 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
4422 for (unsigned i = 0; i < cMaxPhysExts; i++)
4423 {
4424 paPhysExts[i].iNext = i + 1;
4425 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
4426 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
4427 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
4428 }
4429 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4430#endif
4431
4432#ifdef PGMPOOL_WITH_MONITORING
4433 /*
4434 * Just zap the modified list.
4435 */
4436 pPool->cModifiedPages = 0;
4437 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
4438#endif
4439
4440#ifdef PGMPOOL_WITH_CACHE
4441 /*
4442 * Clear the GCPhys hash and the age list.
4443 */
4444 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
4445 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
4446 pPool->iAgeHead = NIL_PGMPOOL_IDX;
4447 pPool->iAgeTail = NIL_PGMPOOL_IDX;
4448#endif
4449
4450 /*
4451 * Reinsert active pages into the hash and ensure monitoring chains are correct.
4452 */
4453 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
4454 {
4455 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4456 pPage->iNext = NIL_PGMPOOL_IDX;
4457#ifdef PGMPOOL_WITH_MONITORING
4458 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4459 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4460 pPage->cModifications = 0;
4461 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
4462 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4463 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4464 if (pPage->fMonitored)
4465 {
4466 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
4467 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
4468 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
4469 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
4470 pPool->pszAccessHandler);
4471 AssertFatalRCSuccess(rc);
4472# ifdef PGMPOOL_WITH_CACHE
4473 pgmPoolHashInsert(pPool, pPage);
4474# endif
4475 }
4476#endif
4477#ifdef PGMPOOL_WITH_USER_TRACKING
4478 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
4479#endif
4480#ifdef PGMPOOL_WITH_CACHE
4481 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
4482 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
4483#endif
4484 }
4485
4486 for (unsigned i=0;i<pVM->cCPUs;i++)
4487 {
4488 PVMCPU pVCpu = &pVM->aCpus[i];
4489 /*
4490 * Re-enter the shadowing mode and assert Sync CR3 FF.
4491 */
4492 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
4493 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
4494 }
4495
4496 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
4497}
4498#endif /* IN_RING3 */
4499
4500#ifdef LOG_ENABLED
4501static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
4502{
4503 switch(enmKind)
4504 {
4505 case PGMPOOLKIND_INVALID:
4506 return "PGMPOOLKIND_INVALID";
4507 case PGMPOOLKIND_FREE:
4508 return "PGMPOOLKIND_FREE";
4509 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4510 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
4511 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4512 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
4513 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4514 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
4515 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4516 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
4517 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4518 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
4519 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4520 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
4521 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4522 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
4523 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4524 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
4525 case PGMPOOLKIND_32BIT_PD:
4526 return "PGMPOOLKIND_32BIT_PD";
4527 case PGMPOOLKIND_32BIT_PD_PHYS:
4528 return "PGMPOOLKIND_32BIT_PD_PHYS";
4529 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4530 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
4531 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4532 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
4533 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4534 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
4535 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4536 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
4537 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4538 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
4539 case PGMPOOLKIND_PAE_PD_PHYS:
4540 return "PGMPOOLKIND_PAE_PD_PHYS";
4541 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4542 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
4543 case PGMPOOLKIND_PAE_PDPT:
4544 return "PGMPOOLKIND_PAE_PDPT";
4545 case PGMPOOLKIND_PAE_PDPT_PHYS:
4546 return "PGMPOOLKIND_PAE_PDPT_PHYS";
4547 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4548 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
4549 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4550 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
4551 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4552 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
4553 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4554 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
4555 case PGMPOOLKIND_64BIT_PML4:
4556 return "PGMPOOLKIND_64BIT_PML4";
4557 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4558 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
4559 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4560 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
4561 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4562 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
4563 case PGMPOOLKIND_ROOT_NESTED:
4564 return "PGMPOOLKIND_ROOT_NESTED";
4565 }
4566 return "Unknown kind!";
4567}
4568#endif /* LOG_ENABLED*/
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette