VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 16468

Last change on this file since 16468 was 16428, checked in by vboxsync, 16 years ago

VBOX_WITH_PGMPOOL_PAGING_ONLY: cleaned up

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 156.6 KB
Line 
1/* $Id: PGMAllPool.cpp 16428 2009-01-30 16:49:19Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_RC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37#include <VBox/hwacc_vmx.h>
38
39#include <VBox/log.h>
40#include <VBox/err.h>
41#include <iprt/asm.h>
42
43
44/*******************************************************************************
45* Internal Functions *
46*******************************************************************************/
47__BEGIN_DECLS
48static void pgmPoolFlushAllInt(PPGMPOOL pPool);
49#ifdef PGMPOOL_WITH_USER_TRACKING
50DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
51DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
52static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53#endif
54#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
55static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
56#endif
57#ifdef PGMPOOL_WITH_CACHE
58static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
59#endif
60#ifdef PGMPOOL_WITH_MONITORING
61static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
62#endif
63#ifndef IN_RING3
64DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
65#endif
66__END_DECLS
67
68
69/**
70 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
71 *
72 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
73 * @param enmKind The page kind.
74 */
75DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
76{
77 switch (enmKind)
78 {
79 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
81 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
82 return true;
83 default:
84 return false;
85 }
86}
87
88
89#if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
90/**
91 * Maps a pool page into the current context.
92 *
93 * @returns Pointer to the mapping.
94 * @param pPGM Pointer to the PGM instance data.
95 * @param pPage The page to map.
96 */
97void *pgmPoolMapPageFallback(PPGM pPGM, PPGMPOOLPAGE pPage)
98{
99 /* general pages are take care of by the inlined part, it
100 only ends up here in case of failure. */
101 AssertReleaseReturn(pPage->idx < PGMPOOL_IDX_FIRST, NULL);
102
103/** @todo make sure HCPhys is valid for *all* indexes. */
104 /* special pages. */
105# ifdef IN_RC
106 switch (pPage->idx)
107 {
108# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
109 case PGMPOOL_IDX_PD:
110 case PGMPOOL_IDX_PDPT:
111 case PGMPOOL_IDX_AMD64_CR3:
112 return pPGM->pShwRootRC;
113# else
114 case PGMPOOL_IDX_PD:
115 return pPGM->pShw32BitPdRC;
116 case PGMPOOL_IDX_PAE_PD:
117 case PGMPOOL_IDX_PAE_PD_0:
118 return pPGM->apShwPaePDsRC[0];
119 case PGMPOOL_IDX_PAE_PD_1:
120 return pPGM->apShwPaePDsRC[1];
121 case PGMPOOL_IDX_PAE_PD_2:
122 return pPGM->apShwPaePDsRC[2];
123 case PGMPOOL_IDX_PAE_PD_3:
124 return pPGM->apShwPaePDsRC[3];
125 case PGMPOOL_IDX_PDPT:
126 return pPGM->pShwPaePdptRC;
127# endif
128 default:
129 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
130 return NULL;
131 }
132
133# else /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
134 RTHCPHYS HCPhys;
135 switch (pPage->idx)
136 {
137# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
138 case PGMPOOL_IDX_PD:
139 case PGMPOOL_IDX_PDPT:
140 case PGMPOOL_IDX_AMD64_CR3:
141 HCPhys = pPGM->HCPhysShwCR3;
142 break;
143
144 case PGMPOOL_IDX_NESTED_ROOT:
145 HCPhys = pPGM->HCPhysShwNestedRoot;
146 break;
147# else
148 case PGMPOOL_IDX_PD:
149 HCPhys = pPGM->HCPhysShw32BitPD;
150 break;
151 case PGMPOOL_IDX_PAE_PD_0:
152 HCPhys = pPGM->aHCPhysPaePDs[0];
153 break;
154 case PGMPOOL_IDX_PAE_PD_1:
155 HCPhys = pPGM->aHCPhysPaePDs[1];
156 break;
157 case PGMPOOL_IDX_PAE_PD_2:
158 HCPhys = pPGM->aHCPhysPaePDs[2];
159 break;
160 case PGMPOOL_IDX_PAE_PD_3:
161 HCPhys = pPGM->aHCPhysPaePDs[3];
162 break;
163 case PGMPOOL_IDX_PDPT:
164 HCPhys = pPGM->HCPhysShwPaePdpt;
165 break;
166 case PGMPOOL_IDX_NESTED_ROOT:
167 HCPhys = pPGM->HCPhysShwNestedRoot;
168 break;
169 case PGMPOOL_IDX_PAE_PD:
170 AssertReleaseMsgFailed(("PGMPOOL_IDX_PAE_PD is not usable in VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 context\n"));
171 return NULL;
172# endif
173 default:
174 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
175 return NULL;
176 }
177 AssertMsg(HCPhys && HCPhys != NIL_RTHCPHYS && !(PAGE_OFFSET_MASK & HCPhys), ("%RHp\n", HCPhys));
178
179 void *pv;
180 pgmR0DynMapHCPageInlined(pPGM, HCPhys, &pv);
181 return pv;
182# endif /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
183}
184#endif /* IN_RC || VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
185
186
187#ifdef PGMPOOL_WITH_MONITORING
188/**
189 * Determin the size of a write instruction.
190 * @returns number of bytes written.
191 * @param pDis The disassembler state.
192 */
193static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
194{
195 /*
196 * This is very crude and possibly wrong for some opcodes,
197 * but since it's not really supposed to be called we can
198 * probably live with that.
199 */
200 return DISGetParamSize(pDis, &pDis->param1);
201}
202
203
204/**
205 * Flushes a chain of pages sharing the same access monitor.
206 *
207 * @returns VBox status code suitable for scheduling.
208 * @param pPool The pool.
209 * @param pPage A page in the chain.
210 */
211int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
212{
213 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
214
215 /*
216 * Find the list head.
217 */
218 uint16_t idx = pPage->idx;
219 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
220 {
221 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
222 {
223 idx = pPage->iMonitoredPrev;
224 Assert(idx != pPage->idx);
225 pPage = &pPool->aPages[idx];
226 }
227 }
228
229 /*
230 * Iterate the list flushing each shadow page.
231 */
232 int rc = VINF_SUCCESS;
233 for (;;)
234 {
235 idx = pPage->iMonitoredNext;
236 Assert(idx != pPage->idx);
237 if (pPage->idx >= PGMPOOL_IDX_FIRST)
238 {
239 int rc2 = pgmPoolFlushPage(pPool, pPage);
240 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
241 rc = VINF_PGM_SYNC_CR3;
242 }
243 /* next */
244 if (idx == NIL_PGMPOOL_IDX)
245 break;
246 pPage = &pPool->aPages[idx];
247 }
248 return rc;
249}
250
251
252/**
253 * Wrapper for getting the current context pointer to the entry being modified.
254 *
255 * @returns Pointer to the current context mapping of the entry.
256 * @param pPool The pool.
257 * @param pvFault The fault virtual address.
258 * @param GCPhysFault The fault physical address.
259 * @param cbEntry The entry size.
260 */
261#ifdef IN_RING3
262DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTHCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
263#else
264DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTGCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
265#endif
266{
267#ifdef IN_RC
268 return (const void *)((RTGCUINTPTR)pvFault & ~(RTGCUINTPTR)(cbEntry - 1));
269
270#elif defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
271 void *pvRet;
272 int rc = PGMDynMapGCPageOff(pPool->pVMR0, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
273 AssertFatalRCSuccess(rc);
274 return pvRet;
275
276#elif defined(IN_RING0)
277 void *pvRet;
278 int rc = pgmRamGCPhys2HCPtr(&pPool->pVMR0->pgm.s, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
279 AssertFatalRCSuccess(rc);
280 return pvRet;
281
282#elif defined(IN_RING3)
283 return (RTHCPTR)((uintptr_t)pvFault & ~(RTHCUINTPTR)(cbEntry - 1));
284#else
285# error "huh?"
286#endif
287}
288
289
290/**
291 * Process shadow entries before they are changed by the guest.
292 *
293 * For PT entries we will clear them. For PD entries, we'll simply check
294 * for mapping conflicts and set the SyncCR3 FF if found.
295 *
296 * @param pPool The pool.
297 * @param pPage The head page.
298 * @param GCPhysFault The guest physical fault address.
299 * @param uAddress In R0 and GC this is the guest context fault address (flat).
300 * In R3 this is the host context 'fault' address.
301 * @param pCpu The disassembler state for figuring out the write size.
302 * This need not be specified if the caller knows we won't do cross entry accesses.
303 */
304#ifdef IN_RING3
305void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTHCPTR pvAddress, PDISCPUSTATE pCpu)
306#else
307void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTGCPTR pvAddress, PDISCPUSTATE pCpu)
308#endif
309{
310 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
311 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
312 const unsigned cbWrite = (pCpu) ? pgmPoolDisasWriteSize(pCpu) : 0;
313
314 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp kind=%d cbWrite=%d\n", pvAddress, GCPhysFault, pPage->enmKind, cbWrite));
315
316 for (;;)
317 {
318 union
319 {
320 void *pv;
321 PX86PT pPT;
322 PX86PTPAE pPTPae;
323 PX86PD pPD;
324 PX86PDPAE pPDPae;
325 PX86PDPT pPDPT;
326 PX86PML4 pPML4;
327 } uShw;
328
329 switch (pPage->enmKind)
330 {
331 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
332 {
333 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
334 const unsigned iShw = off / sizeof(X86PTE);
335 if (uShw.pPT->a[iShw].n.u1Present)
336 {
337# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
338 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
339 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
340 pgmPoolTracDerefGCPhysHint(pPool, pPage,
341 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
342 pGstPte->u & X86_PTE_PG_MASK);
343# endif
344 uShw.pPT->a[iShw].u = 0;
345 }
346 break;
347 }
348
349 /* page/2 sized */
350 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
351 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
352 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
353 {
354 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
355 if (uShw.pPTPae->a[iShw].n.u1Present)
356 {
357# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
358 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
359 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
360 pgmPoolTracDerefGCPhysHint(pPool, pPage,
361 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
362 pGstPte->u & X86_PTE_PG_MASK);
363# endif
364 uShw.pPTPae->a[iShw].u = 0;
365 }
366 }
367 break;
368
369 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
370 {
371 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
372 const unsigned iShw = off / sizeof(X86PTEPAE);
373 if (uShw.pPTPae->a[iShw].n.u1Present)
374 {
375# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
376 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
377 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
378 pgmPoolTracDerefGCPhysHint(pPool, pPage,
379 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
380 pGstPte->u & X86_PTE_PAE_PG_MASK);
381# endif
382 uShw.pPTPae->a[iShw].u = 0;
383 }
384
385 /* paranoia / a bit assumptive. */
386 if ( pCpu
387 && (off & 7)
388 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
389 {
390 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
391 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
392
393 if (uShw.pPTPae->a[iShw2].n.u1Present)
394 {
395# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
396 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
397 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
398 pgmPoolTracDerefGCPhysHint(pPool, pPage,
399 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
400 pGstPte->u & X86_PTE_PAE_PG_MASK);
401# endif
402 uShw.pPTPae->a[iShw2].u = 0;
403 }
404 }
405
406 break;
407 }
408
409# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
410 case PGMPOOLKIND_32BIT_PD:
411# else
412 case PGMPOOLKIND_ROOT_32BIT_PD:
413# endif
414 {
415 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
416 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
417 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
418 {
419 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
420 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
421 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
422 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
423 }
424 /* paranoia / a bit assumptive. */
425 else if ( pCpu
426 && (off & 3)
427 && (off & 3) + cbWrite > sizeof(X86PTE))
428 {
429 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
430 if ( iShw2 != iShw
431 && iShw2 < RT_ELEMENTS(uShw.pPD->a)
432 && uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
433 {
434 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
435 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
436 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
437 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
438 }
439 }
440#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
441 if ( uShw.pPD->a[iShw].n.u1Present
442 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
443 {
444 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
445# ifdef IN_RC /* TLB load - we're pushing things a bit... */
446 ASMProbeReadByte(pvAddress);
447# endif
448 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
449 uShw.pPD->a[iShw].u = 0;
450 }
451#endif
452 break;
453 }
454
455# ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
456 case PGMPOOLKIND_ROOT_PAE_PD:
457 {
458 unsigned iGst = off / sizeof(X86PDE); // ASSUMING 32-bit guest paging!
459 unsigned iShwPdpt = iGst / 256;
460 unsigned iShw = (iGst % 256) * 2;
461 Assert(pPage->idx == PGMPOOL_IDX_PAE_PD);
462 PPGMPOOLPAGE pPage2 = pPage + 1 + iShwPdpt;
463 Assert(pPage2->idx == PGMPOOL_IDX_PAE_PD_0 + iShwPdpt);
464 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage2);
465 for (unsigned i = 0; i < 2; i++, iShw++)
466 {
467 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
468 {
469 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
470 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
471 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw));
472 }
473 /* paranoia / a bit assumptive. */
474 else if ( pCpu
475 && (off & 3)
476 && (off & 3) + cbWrite > 4)
477 {
478 const unsigned iShw2 = iShw + 2;
479 if ( iShw2 < RT_ELEMENTS(uShw.pPDPae->a) /** @todo was completely wrong, it's better now after #1865 but still wrong from cross PD. */
480 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
481 {
482 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
483 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
484 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
485 }
486 }
487#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
488 if ( uShw.pPDPae->a[iShw].n.u1Present
489 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
490 {
491 LogFlow(("pgmPoolMonitorChainChanging: iShwPdpt=%#x iShw=%#x: %RX64 -> freeing it!\n", iShwPdpt, iShw, uShw.pPDPae->a[iShw].u));
492# ifdef IN_RC /* TLB load - we're pushing things a bit... */
493 ASMProbeReadByte(pvAddress);
494# endif
495 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw + iShwPdpt * X86_PG_PAE_ENTRIES);
496 uShw.pPDPae->a[iShw].u = 0;
497 }
498#endif
499 }
500 break;
501 }
502# endif /* !VBOX_WITH_PGMPOOL_PAGING_ONLY */
503
504 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
505 {
506 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
507 const unsigned iShw = off / sizeof(X86PDEPAE);
508 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
509 {
510 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
511 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
512 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
513 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
514 }
515#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
516 /*
517 * Causes trouble when the guest uses a PDE to refer to the whole page table level
518 * structure. (Invalidate here; faults later on when it tries to change the page
519 * table entries -> recheck; probably only applies to the RC case.)
520 */
521 else
522 {
523 if (uShw.pPDPae->a[iShw].n.u1Present)
524 {
525 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
526 pgmPoolFree(pPool->CTX_SUFF(pVM),
527 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
528 /* Note: hardcoded PAE implementation dependency */
529 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
530 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw);
531 uShw.pPDPae->a[iShw].u = 0;
532 }
533 }
534#endif
535 /* paranoia / a bit assumptive. */
536 if ( pCpu
537 && (off & 7)
538 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
539 {
540 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
541 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
542
543 if ( iShw2 != iShw
544 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
545 {
546 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
547 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
548 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
549 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
550 }
551#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
552 else if (uShw.pPDPae->a[iShw2].n.u1Present)
553 {
554 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
555 pgmPoolFree(pPool->CTX_SUFF(pVM),
556 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
557 /* Note: hardcoded PAE implementation dependency */
558 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
559 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw2 + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw2);
560 uShw.pPDPae->a[iShw2].u = 0;
561 }
562#endif
563 }
564 break;
565 }
566
567# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
568 case PGMPOOLKIND_PAE_PDPT:
569# else
570 case PGMPOOLKIND_ROOT_PDPT:
571# endif
572 {
573 /*
574 * Hopefully this doesn't happen very often:
575 * - touching unused parts of the page
576 * - messing with the bits of pd pointers without changing the physical address
577 */
578 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
579 const unsigned iShw = off / sizeof(X86PDPE);
580 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
581 {
582 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
583 {
584 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
585 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
586 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
587 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
588 }
589 /* paranoia / a bit assumptive. */
590 else if ( pCpu
591 && (off & 7)
592 && (off & 7) + cbWrite > sizeof(X86PDPE))
593 {
594 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
595 if ( iShw2 != iShw
596 && iShw2 < X86_PG_PAE_PDPE_ENTRIES
597 && uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
598 {
599 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
600 STAM_COUNTER_INC(&(pPool->CTX_SUFF(pVM)->pgm.s.StatRZGuestCR3WriteConflict));
601 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
602 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
603 }
604 }
605 }
606 break;
607 }
608
609#ifndef IN_RC
610 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
611 {
612 Assert(pPage->enmKind == PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD);
613
614 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
615 const unsigned iShw = off / sizeof(X86PDEPAE);
616 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
617 {
618 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
619 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
620 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
621 }
622 else
623 {
624 if (uShw.pPDPae->a[iShw].n.u1Present)
625 {
626 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
627 pgmPoolFree(pPool->CTX_SUFF(pVM),
628 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
629 pPage->idx,
630 iShw);
631 uShw.pPDPae->a[iShw].u = 0;
632 }
633 }
634 /* paranoia / a bit assumptive. */
635 if ( pCpu
636 && (off & 7)
637 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
638 {
639 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
640 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
641
642 if ( iShw2 != iShw
643 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
644 {
645 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
646 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
647 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
648 }
649 else
650 if (uShw.pPDPae->a[iShw2].n.u1Present)
651 {
652 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
653 pgmPoolFree(pPool->CTX_SUFF(pVM),
654 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
655 pPage->idx,
656 iShw2);
657 uShw.pPDPae->a[iShw2].u = 0;
658 }
659 }
660 break;
661 }
662
663 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
664 {
665 /*
666 * Hopefully this doesn't happen very often:
667 * - messing with the bits of pd pointers without changing the physical address
668 */
669 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
670 {
671 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
672 const unsigned iShw = off / sizeof(X86PDPE);
673 if (uShw.pPDPT->a[iShw].n.u1Present)
674 {
675 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
676 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
677 uShw.pPDPT->a[iShw].u = 0;
678 }
679 /* paranoia / a bit assumptive. */
680 if ( pCpu
681 && (off & 7)
682 && (off & 7) + cbWrite > sizeof(X86PDPE))
683 {
684 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
685 if (uShw.pPDPT->a[iShw2].n.u1Present)
686 {
687 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
688 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
689 uShw.pPDPT->a[iShw2].u = 0;
690 }
691 }
692 }
693 break;
694 }
695
696 case PGMPOOLKIND_64BIT_PML4:
697 {
698 /*
699 * Hopefully this doesn't happen very often:
700 * - messing with the bits of pd pointers without changing the physical address
701 */
702 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
703 {
704 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
705 const unsigned iShw = off / sizeof(X86PDPE);
706 if (uShw.pPML4->a[iShw].n.u1Present)
707 {
708 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
709 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
710 uShw.pPML4->a[iShw].u = 0;
711 }
712 /* paranoia / a bit assumptive. */
713 if ( pCpu
714 && (off & 7)
715 && (off & 7) + cbWrite > sizeof(X86PDPE))
716 {
717 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
718 if (uShw.pPML4->a[iShw2].n.u1Present)
719 {
720 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
721 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
722 uShw.pPML4->a[iShw2].u = 0;
723 }
724 }
725 }
726 break;
727 }
728#endif /* IN_RING0 */
729
730 default:
731 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
732 }
733
734 /* next */
735 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
736 return;
737 pPage = &pPool->aPages[pPage->iMonitoredNext];
738 }
739}
740
741
742# ifndef IN_RING3
743/**
744 * Checks if a access could be a fork operation in progress.
745 *
746 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
747 *
748 * @returns true if it's likly that we're forking, otherwise false.
749 * @param pPool The pool.
750 * @param pCpu The disassembled instruction.
751 * @param offFault The access offset.
752 */
753DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
754{
755 /*
756 * i386 linux is using btr to clear X86_PTE_RW.
757 * The functions involved are (2.6.16 source inspection):
758 * clear_bit
759 * ptep_set_wrprotect
760 * copy_one_pte
761 * copy_pte_range
762 * copy_pmd_range
763 * copy_pud_range
764 * copy_page_range
765 * dup_mmap
766 * dup_mm
767 * copy_mm
768 * copy_process
769 * do_fork
770 */
771 if ( pCpu->pCurInstr->opcode == OP_BTR
772 && !(offFault & 4)
773 /** @todo Validate that the bit index is X86_PTE_RW. */
774 )
775 {
776 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
777 return true;
778 }
779 return false;
780}
781
782
783/**
784 * Determine whether the page is likely to have been reused.
785 *
786 * @returns true if we consider the page as being reused for a different purpose.
787 * @returns false if we consider it to still be a paging page.
788 * @param pVM VM Handle.
789 * @param pPage The page in question.
790 * @param pRegFrame Trap register frame.
791 * @param pCpu The disassembly info for the faulting instruction.
792 * @param pvFault The fault address.
793 *
794 * @remark The REP prefix check is left to the caller because of STOSD/W.
795 */
796DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PPGMPOOLPAGE pPage, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pCpu, RTGCPTR pvFault)
797{
798#ifndef IN_RC
799 /** @todo could make this general, faulting close to rsp should be safe reuse heuristic. */
800 if ( HWACCMHasPendingIrq(pVM)
801 && (pRegFrame->rsp - pvFault) < 32)
802 {
803 /* Fault caused by stack writes while trying to inject an interrupt event. */
804 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
805 return true;
806 }
807#else
808 NOREF(pVM); NOREF(pvFault);
809#endif
810
811 switch (pCpu->pCurInstr->opcode)
812 {
813 /* call implies the actual push of the return address faulted */
814 case OP_CALL:
815 Log4(("pgmPoolMonitorIsReused: CALL\n"));
816 return true;
817 case OP_PUSH:
818 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
819 return true;
820 case OP_PUSHF:
821 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
822 return true;
823 case OP_PUSHA:
824 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
825 return true;
826 case OP_FXSAVE:
827 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
828 return true;
829 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
830 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
831 return true;
832 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
833 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
834 return true;
835 case OP_MOVSWD:
836 case OP_STOSWD:
837 if ( pCpu->prefix == (PREFIX_REP|PREFIX_REX)
838 && pRegFrame->rcx >= 0x40
839 )
840 {
841 Assert(pCpu->mode == CPUMODE_64BIT);
842
843 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
844 return true;
845 }
846 return false;
847 }
848 if ( (pCpu->param1.flags & USE_REG_GEN32)
849 && (pCpu->param1.base.reg_gen == USE_REG_ESP))
850 {
851 Log4(("pgmPoolMonitorIsReused: ESP\n"));
852 return true;
853 }
854
855 //if (pPage->fCR3Mix)
856 // return false;
857 return false;
858}
859
860
861/**
862 * Flushes the page being accessed.
863 *
864 * @returns VBox status code suitable for scheduling.
865 * @param pVM The VM handle.
866 * @param pPool The pool.
867 * @param pPage The pool page (head).
868 * @param pCpu The disassembly of the write instruction.
869 * @param pRegFrame The trap register frame.
870 * @param GCPhysFault The fault address as guest physical address.
871 * @param pvFault The fault address.
872 */
873static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
874 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
875{
876 /*
877 * First, do the flushing.
878 */
879 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
880
881 /*
882 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
883 */
884 uint32_t cbWritten;
885 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
886 if (RT_SUCCESS(rc2))
887 pRegFrame->rip += pCpu->opsize;
888 else if (rc2 == VERR_EM_INTERPRETER)
889 {
890#ifdef IN_RC
891 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
892 {
893 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
894 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
895 rc = VINF_SUCCESS;
896 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
897 }
898 else
899#endif
900 {
901 rc = VINF_EM_RAW_EMULATE_INSTR;
902 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
903 }
904 }
905 else
906 rc = rc2;
907
908 /* See use in pgmPoolAccessHandlerSimple(). */
909 PGM_INVL_GUEST_TLBS();
910
911 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
912 return rc;
913
914}
915
916
917/**
918 * Handles the STOSD write accesses.
919 *
920 * @returns VBox status code suitable for scheduling.
921 * @param pVM The VM handle.
922 * @param pPool The pool.
923 * @param pPage The pool page (head).
924 * @param pCpu The disassembly of the write instruction.
925 * @param pRegFrame The trap register frame.
926 * @param GCPhysFault The fault address as guest physical address.
927 * @param pvFault The fault address.
928 */
929DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
930 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
931{
932 Assert(pCpu->mode == CPUMODE_32BIT);
933
934 /*
935 * Increment the modification counter and insert it into the list
936 * of modified pages the first time.
937 */
938 if (!pPage->cModifications++)
939 pgmPoolMonitorModifiedInsert(pPool, pPage);
940
941 /*
942 * Execute REP STOSD.
943 *
944 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
945 * write situation, meaning that it's safe to write here.
946 */
947#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
948 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
949#endif
950 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
951 while (pRegFrame->ecx)
952 {
953#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
954 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
955 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
956 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
957#else
958 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
959#endif
960#ifdef IN_RC
961 *(uint32_t *)pu32 = pRegFrame->eax;
962#else
963 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
964#endif
965 pu32 += 4;
966 GCPhysFault += 4;
967 pRegFrame->edi += 4;
968 pRegFrame->ecx--;
969 }
970 pRegFrame->rip += pCpu->opsize;
971
972 /* See use in pgmPoolAccessHandlerSimple(). */
973 PGM_INVL_GUEST_TLBS();
974
975 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
976 return VINF_SUCCESS;
977}
978
979
980/**
981 * Handles the simple write accesses.
982 *
983 * @returns VBox status code suitable for scheduling.
984 * @param pVM The VM handle.
985 * @param pPool The pool.
986 * @param pPage The pool page (head).
987 * @param pCpu The disassembly of the write instruction.
988 * @param pRegFrame The trap register frame.
989 * @param GCPhysFault The fault address as guest physical address.
990 * @param pvFault The fault address.
991 */
992DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
993 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
994{
995 /*
996 * Increment the modification counter and insert it into the list
997 * of modified pages the first time.
998 */
999 if (!pPage->cModifications++)
1000 pgmPoolMonitorModifiedInsert(pPool, pPage);
1001
1002 /*
1003 * Clear all the pages. ASSUMES that pvFault is readable.
1004 */
1005#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1006 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
1007 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1008 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
1009 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1010#else
1011 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
1012#endif
1013
1014 /*
1015 * Interpret the instruction.
1016 */
1017 uint32_t cb;
1018 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
1019 if (RT_SUCCESS(rc))
1020 pRegFrame->rip += pCpu->opsize;
1021 else if (rc == VERR_EM_INTERPRETER)
1022 {
1023 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1024 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pCpu->pCurInstr->opcode));
1025 rc = VINF_EM_RAW_EMULATE_INSTR;
1026 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1027 }
1028
1029 /*
1030 * Quick hack, with logging enabled we're getting stale
1031 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
1032 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
1033 * have to be fixed to support this. But that'll have to wait till next week.
1034 *
1035 * An alternative is to keep track of the changed PTEs together with the
1036 * GCPhys from the guest PT. This may proove expensive though.
1037 *
1038 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
1039 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
1040 */
1041 PGM_INVL_GUEST_TLBS();
1042
1043 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
1044 return rc;
1045}
1046
1047
1048/**
1049 * \#PF Handler callback for PT write accesses.
1050 *
1051 * @returns VBox status code (appropriate for GC return).
1052 * @param pVM VM Handle.
1053 * @param uErrorCode CPU Error code.
1054 * @param pRegFrame Trap register frame.
1055 * NULL on DMA and other non CPU access.
1056 * @param pvFault The fault address (cr2).
1057 * @param GCPhysFault The GC physical address corresponding to pvFault.
1058 * @param pvUser User argument.
1059 */
1060DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1061{
1062 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1063 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1064 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1065 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1066
1067 /*
1068 * We should ALWAYS have the list head as user parameter. This
1069 * is because we use that page to record the changes.
1070 */
1071 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1072
1073 /*
1074 * Disassemble the faulting instruction.
1075 */
1076 DISCPUSTATE Cpu;
1077 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
1078 AssertRCReturn(rc, rc);
1079
1080 /*
1081 * Check if it's worth dealing with.
1082 */
1083 bool fReused = false;
1084 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1085 || pPage->fCR3Mix)
1086 && !(fReused = pgmPoolMonitorIsReused(pVM, pPage, pRegFrame, &Cpu, pvFault))
1087 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
1088 {
1089 /*
1090 * Simple instructions, no REP prefix.
1091 */
1092 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
1093 {
1094 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1095 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1096 return rc;
1097 }
1098
1099 /*
1100 * Windows is frequently doing small memset() operations (netio test 4k+).
1101 * We have to deal with these or we'll kill the cache and performance.
1102 */
1103 if ( Cpu.pCurInstr->opcode == OP_STOSWD
1104 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
1105 && pRegFrame->ecx <= 0x20
1106 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1107 && !((uintptr_t)pvFault & 3)
1108 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1109 && Cpu.mode == CPUMODE_32BIT
1110 && Cpu.opmode == CPUMODE_32BIT
1111 && Cpu.addrmode == CPUMODE_32BIT
1112 && Cpu.prefix == PREFIX_REP
1113 && !pRegFrame->eflags.Bits.u1DF
1114 )
1115 {
1116 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1117 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1118 return rc;
1119 }
1120
1121 /* REP prefix, don't bother. */
1122 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1123 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1124 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, Cpu.pCurInstr->opcode, Cpu.prefix));
1125 }
1126
1127 /*
1128 * Not worth it, so flush it.
1129 *
1130 * If we considered it to be reused, don't to back to ring-3
1131 * to emulate failed instructions since we usually cannot
1132 * interpret then. This may be a bit risky, in which case
1133 * the reuse detection must be fixed.
1134 */
1135 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1136 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
1137 rc = VINF_SUCCESS;
1138 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1139 return rc;
1140}
1141
1142# endif /* !IN_RING3 */
1143#endif /* PGMPOOL_WITH_MONITORING */
1144
1145#ifdef PGMPOOL_WITH_CACHE
1146
1147/**
1148 * Inserts a page into the GCPhys hash table.
1149 *
1150 * @param pPool The pool.
1151 * @param pPage The page.
1152 */
1153DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1154{
1155 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1156 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1157 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1158 pPage->iNext = pPool->aiHash[iHash];
1159 pPool->aiHash[iHash] = pPage->idx;
1160}
1161
1162
1163/**
1164 * Removes a page from the GCPhys hash table.
1165 *
1166 * @param pPool The pool.
1167 * @param pPage The page.
1168 */
1169DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1170{
1171 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1172 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1173 if (pPool->aiHash[iHash] == pPage->idx)
1174 pPool->aiHash[iHash] = pPage->iNext;
1175 else
1176 {
1177 uint16_t iPrev = pPool->aiHash[iHash];
1178 for (;;)
1179 {
1180 const int16_t i = pPool->aPages[iPrev].iNext;
1181 if (i == pPage->idx)
1182 {
1183 pPool->aPages[iPrev].iNext = pPage->iNext;
1184 break;
1185 }
1186 if (i == NIL_PGMPOOL_IDX)
1187 {
1188 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1189 break;
1190 }
1191 iPrev = i;
1192 }
1193 }
1194 pPage->iNext = NIL_PGMPOOL_IDX;
1195}
1196
1197
1198/**
1199 * Frees up one cache page.
1200 *
1201 * @returns VBox status code.
1202 * @retval VINF_SUCCESS on success.
1203 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
1204 * @param pPool The pool.
1205 * @param iUser The user index.
1206 */
1207static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1208{
1209#ifndef IN_RC
1210 const PVM pVM = pPool->CTX_SUFF(pVM);
1211#endif
1212 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1213 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1214
1215 /*
1216 * Select one page from the tail of the age list.
1217 */
1218 uint16_t iToFree = pPool->iAgeTail;
1219 if (iToFree == iUser)
1220 iToFree = pPool->aPages[iToFree].iAgePrev;
1221/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1222 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1223 {
1224 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1225 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1226 {
1227 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1228 continue;
1229 iToFree = i;
1230 break;
1231 }
1232 }
1233*/
1234
1235 Assert(iToFree != iUser);
1236 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1237
1238 PPGMPOOLPAGE pPage = &pPool->aPages[iToFree];
1239
1240 /*
1241 * Reject any attempts at flushing the currently active shadow CR3 mapping
1242 */
1243 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
1244 {
1245 /* Refresh the cr3 mapping by putting it at the head of the age list. */
1246 pgmPoolCacheUsed(pPool, pPage);
1247 return pgmPoolCacheFreeOne(pPool, iUser);
1248 }
1249
1250 int rc = pgmPoolFlushPage(pPool, pPage);
1251 if (rc == VINF_SUCCESS)
1252 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1253 return rc;
1254}
1255
1256
1257/**
1258 * Checks if a kind mismatch is really a page being reused
1259 * or if it's just normal remappings.
1260 *
1261 * @returns true if reused and the cached page (enmKind1) should be flushed
1262 * @returns false if not reused.
1263 * @param enmKind1 The kind of the cached page.
1264 * @param enmKind2 The kind of the requested page.
1265 */
1266static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1267{
1268 switch (enmKind1)
1269 {
1270 /*
1271 * Never reuse them. There is no remapping in non-paging mode.
1272 */
1273 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1274 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1275 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1276 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1277 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1278 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1279 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1280 case PGMPOOLKIND_PAE_PD_PHYS:
1281 case PGMPOOLKIND_PAE_PDPT_PHYS:
1282 case PGMPOOLKIND_32BIT_PD_PHYS:
1283 return true;
1284
1285 /*
1286 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1287 */
1288 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1289 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1290 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1291 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1292 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1293 switch (enmKind2)
1294 {
1295 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1296 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1297 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1298 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1299 case PGMPOOLKIND_64BIT_PML4:
1300 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1301 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1302 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1303 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1304 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1305 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1306 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1307 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1308 return true;
1309 default:
1310 return false;
1311 }
1312
1313 /*
1314 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1315 */
1316 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1317 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1318 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1319 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1320 case PGMPOOLKIND_64BIT_PML4:
1321 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1322 switch (enmKind2)
1323 {
1324 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1325 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1326 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1327 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1328 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1329 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1330 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1331 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1332 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1333 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1334 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1335 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1336 return true;
1337 default:
1338 return false;
1339 }
1340
1341 /*
1342 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1343 */
1344#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
1345 case PGMPOOLKIND_ROOT_32BIT_PD:
1346 case PGMPOOLKIND_ROOT_PAE_PD:
1347 case PGMPOOLKIND_ROOT_PDPT:
1348#endif
1349 case PGMPOOLKIND_ROOT_NESTED:
1350 return false;
1351
1352 default:
1353 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1354 }
1355}
1356
1357
1358/**
1359 * Attempts to satisfy a pgmPoolAlloc request from the cache.
1360 *
1361 * @returns VBox status code.
1362 * @retval VINF_PGM_CACHED_PAGE on success.
1363 * @retval VERR_FILE_NOT_FOUND if not found.
1364 * @param pPool The pool.
1365 * @param GCPhys The GC physical address of the page we're gonna shadow.
1366 * @param enmKind The kind of mapping.
1367 * @param iUser The shadow page pool index of the user table.
1368 * @param iUserTable The index into the user table (shadowed).
1369 * @param ppPage Where to store the pointer to the page.
1370 */
1371static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
1372{
1373#ifndef IN_RC
1374 const PVM pVM = pPool->CTX_SUFF(pVM);
1375#endif
1376 /*
1377 * Look up the GCPhys in the hash.
1378 */
1379 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1380 Log3(("pgmPoolCacheAlloc: %RGp kind %d iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, enmKind, iUser, iUserTable, i));
1381 if (i != NIL_PGMPOOL_IDX)
1382 {
1383 do
1384 {
1385 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1386 Log3(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
1387 if (pPage->GCPhys == GCPhys)
1388 {
1389 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
1390 {
1391 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
1392 if (RT_SUCCESS(rc))
1393 {
1394 *ppPage = pPage;
1395 STAM_COUNTER_INC(&pPool->StatCacheHits);
1396 return VINF_PGM_CACHED_PAGE;
1397 }
1398 return rc;
1399 }
1400
1401 /*
1402 * The kind is different. In some cases we should now flush the page
1403 * as it has been reused, but in most cases this is normal remapping
1404 * of PDs as PT or big pages using the GCPhys field in a slightly
1405 * different way than the other kinds.
1406 */
1407 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
1408 {
1409 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
1410 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
1411 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1412 break;
1413 }
1414 }
1415
1416 /* next */
1417 i = pPage->iNext;
1418 } while (i != NIL_PGMPOOL_IDX);
1419 }
1420
1421 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%d\n", GCPhys, enmKind));
1422 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1423 return VERR_FILE_NOT_FOUND;
1424}
1425
1426
1427/**
1428 * Inserts a page into the cache.
1429 *
1430 * @param pPool The pool.
1431 * @param pPage The cached page.
1432 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1433 */
1434static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1435{
1436 /*
1437 * Insert into the GCPhys hash if the page is fit for that.
1438 */
1439 Assert(!pPage->fCached);
1440 if (fCanBeCached)
1441 {
1442 pPage->fCached = true;
1443 pgmPoolHashInsert(pPool, pPage);
1444 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1445 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1446 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1447 }
1448 else
1449 {
1450 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1451 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1452 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1453 }
1454
1455 /*
1456 * Insert at the head of the age list.
1457 */
1458 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1459 pPage->iAgeNext = pPool->iAgeHead;
1460 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1461 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1462 else
1463 pPool->iAgeTail = pPage->idx;
1464 pPool->iAgeHead = pPage->idx;
1465}
1466
1467
1468/**
1469 * Flushes a cached page.
1470 *
1471 * @param pPool The pool.
1472 * @param pPage The cached page.
1473 */
1474static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1475{
1476 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
1477
1478 /*
1479 * Remove the page from the hash.
1480 */
1481 if (pPage->fCached)
1482 {
1483 pPage->fCached = false;
1484 pgmPoolHashRemove(pPool, pPage);
1485 }
1486 else
1487 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1488
1489 /*
1490 * Remove it from the age list.
1491 */
1492 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1493 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1494 else
1495 pPool->iAgeTail = pPage->iAgePrev;
1496 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1497 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1498 else
1499 pPool->iAgeHead = pPage->iAgeNext;
1500 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1501 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1502}
1503
1504#endif /* PGMPOOL_WITH_CACHE */
1505#ifdef PGMPOOL_WITH_MONITORING
1506
1507/**
1508 * Looks for pages sharing the monitor.
1509 *
1510 * @returns Pointer to the head page.
1511 * @returns NULL if not found.
1512 * @param pPool The Pool
1513 * @param pNewPage The page which is going to be monitored.
1514 */
1515static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1516{
1517#ifdef PGMPOOL_WITH_CACHE
1518 /*
1519 * Look up the GCPhys in the hash.
1520 */
1521 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1522 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1523 if (i == NIL_PGMPOOL_IDX)
1524 return NULL;
1525 do
1526 {
1527 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1528 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1529 && pPage != pNewPage)
1530 {
1531 switch (pPage->enmKind)
1532 {
1533 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1534 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1535 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1536 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1537 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1538 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1539 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1540 case PGMPOOLKIND_64BIT_PML4:
1541#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1542 case PGMPOOLKIND_32BIT_PD:
1543 case PGMPOOLKIND_PAE_PDPT:
1544#else
1545 case PGMPOOLKIND_ROOT_32BIT_PD:
1546 case PGMPOOLKIND_ROOT_PAE_PD:
1547 case PGMPOOLKIND_ROOT_PDPT:
1548#endif
1549 {
1550 /* find the head */
1551 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1552 {
1553 Assert(pPage->iMonitoredPrev != pPage->idx);
1554 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1555 }
1556 return pPage;
1557 }
1558
1559 /* ignore, no monitoring. */
1560 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1561 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1562 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1563 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1564 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1565 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1566 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1567 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1568 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1569 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1570 case PGMPOOLKIND_ROOT_NESTED:
1571 case PGMPOOLKIND_PAE_PD_PHYS:
1572 case PGMPOOLKIND_PAE_PDPT_PHYS:
1573 case PGMPOOLKIND_32BIT_PD_PHYS:
1574 break;
1575 default:
1576 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1577 }
1578 }
1579
1580 /* next */
1581 i = pPage->iNext;
1582 } while (i != NIL_PGMPOOL_IDX);
1583#endif
1584 return NULL;
1585}
1586
1587
1588/**
1589 * Enabled write monitoring of a guest page.
1590 *
1591 * @returns VBox status code.
1592 * @retval VINF_SUCCESS on success.
1593 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1594 * @param pPool The pool.
1595 * @param pPage The cached page.
1596 */
1597static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1598{
1599 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
1600
1601 /*
1602 * Filter out the relevant kinds.
1603 */
1604 switch (pPage->enmKind)
1605 {
1606 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1607 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1608 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1609 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1610 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1611 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1612 case PGMPOOLKIND_64BIT_PML4:
1613#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1614 case PGMPOOLKIND_32BIT_PD:
1615 case PGMPOOLKIND_PAE_PDPT:
1616#else
1617 case PGMPOOLKIND_ROOT_PDPT:
1618#endif
1619 break;
1620
1621 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1622 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1623 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1624 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1625 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1626 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1627 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1628 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1629 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1630 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1631 case PGMPOOLKIND_ROOT_NESTED:
1632 /* Nothing to monitor here. */
1633 return VINF_SUCCESS;
1634
1635#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1636 case PGMPOOLKIND_32BIT_PD_PHYS:
1637 case PGMPOOLKIND_PAE_PDPT_PHYS:
1638 case PGMPOOLKIND_PAE_PD_PHYS:
1639 /* Nothing to monitor here. */
1640 return VINF_SUCCESS;
1641#else
1642 case PGMPOOLKIND_ROOT_32BIT_PD:
1643 case PGMPOOLKIND_ROOT_PAE_PD:
1644#endif
1645#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1646 break;
1647#endif
1648 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1649 default:
1650 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1651 }
1652
1653 /*
1654 * Install handler.
1655 */
1656 int rc;
1657 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1658 if (pPageHead)
1659 {
1660 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1661 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1662 pPage->iMonitoredPrev = pPageHead->idx;
1663 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1664 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1665 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1666 pPageHead->iMonitoredNext = pPage->idx;
1667 rc = VINF_SUCCESS;
1668 }
1669 else
1670 {
1671 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1672 PVM pVM = pPool->CTX_SUFF(pVM);
1673 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1674 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1675 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1676 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1677 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1678 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
1679 pPool->pszAccessHandler);
1680 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1681 * the heap size should suffice. */
1682 AssertFatalRC(rc);
1683 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1684 rc = VERR_PGM_POOL_CLEARED;
1685 }
1686 pPage->fMonitored = true;
1687 return rc;
1688}
1689
1690
1691/**
1692 * Disables write monitoring of a guest page.
1693 *
1694 * @returns VBox status code.
1695 * @retval VINF_SUCCESS on success.
1696 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1697 * @param pPool The pool.
1698 * @param pPage The cached page.
1699 */
1700static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1701{
1702 /*
1703 * Filter out the relevant kinds.
1704 */
1705 switch (pPage->enmKind)
1706 {
1707 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1708 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1709 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1710 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1711 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1712 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1713 case PGMPOOLKIND_64BIT_PML4:
1714#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1715 case PGMPOOLKIND_32BIT_PD:
1716 case PGMPOOLKIND_PAE_PDPT:
1717#else
1718 case PGMPOOLKIND_ROOT_PDPT:
1719#endif
1720 break;
1721
1722 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1723 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1724 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1725 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1726 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1727 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1728 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1729 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1730 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1731 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1732 case PGMPOOLKIND_ROOT_NESTED:
1733 case PGMPOOLKIND_PAE_PD_PHYS:
1734 case PGMPOOLKIND_PAE_PDPT_PHYS:
1735 case PGMPOOLKIND_32BIT_PD_PHYS:
1736 /* Nothing to monitor here. */
1737 return VINF_SUCCESS;
1738
1739#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
1740 case PGMPOOLKIND_ROOT_32BIT_PD:
1741 case PGMPOOLKIND_ROOT_PAE_PD:
1742#endif
1743#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1744 break;
1745#endif
1746 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1747 default:
1748 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1749 }
1750
1751 /*
1752 * Remove the page from the monitored list or uninstall it if last.
1753 */
1754 const PVM pVM = pPool->CTX_SUFF(pVM);
1755 int rc;
1756 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1757 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1758 {
1759 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1760 {
1761 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1762 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1763 pNewHead->fCR3Mix = pPage->fCR3Mix;
1764 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1765 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
1766 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
1767 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
1768 pPool->pszAccessHandler);
1769 AssertFatalRCSuccess(rc);
1770 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1771 }
1772 else
1773 {
1774 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1775 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1776 {
1777 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1778 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1779 }
1780 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1781 rc = VINF_SUCCESS;
1782 }
1783 }
1784 else
1785 {
1786 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
1787 AssertFatalRC(rc);
1788 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1789 rc = VERR_PGM_POOL_CLEARED;
1790 }
1791 pPage->fMonitored = false;
1792
1793 /*
1794 * Remove it from the list of modified pages (if in it).
1795 */
1796 pgmPoolMonitorModifiedRemove(pPool, pPage);
1797
1798 return rc;
1799}
1800
1801# ifdef PGMPOOL_WITH_MIXED_PT_CR3
1802
1803/**
1804 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
1805 *
1806 * @param pPool The Pool.
1807 * @param pPage A page in the chain.
1808 * @param fCR3Mix The new fCR3Mix value.
1809 */
1810static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
1811{
1812 /* current */
1813 pPage->fCR3Mix = fCR3Mix;
1814
1815 /* before */
1816 int16_t idx = pPage->iMonitoredPrev;
1817 while (idx != NIL_PGMPOOL_IDX)
1818 {
1819 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1820 idx = pPool->aPages[idx].iMonitoredPrev;
1821 }
1822
1823 /* after */
1824 idx = pPage->iMonitoredNext;
1825 while (idx != NIL_PGMPOOL_IDX)
1826 {
1827 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1828 idx = pPool->aPages[idx].iMonitoredNext;
1829 }
1830}
1831
1832
1833/**
1834 * Installs or modifies monitoring of a CR3 page (special).
1835 *
1836 * We're pretending the CR3 page is shadowed by the pool so we can use the
1837 * generic mechanisms in detecting chained monitoring. (This also gives us a
1838 * tast of what code changes are required to really pool CR3 shadow pages.)
1839 *
1840 * @returns VBox status code.
1841 * @param pPool The pool.
1842 * @param idxRoot The CR3 (root) page index.
1843 * @param GCPhysCR3 The (new) CR3 value.
1844 */
1845int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
1846{
1847 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1848 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1849 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%RGp, .fMonitored=%d} GCPhysCR3=%RGp\n",
1850 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
1851
1852 /*
1853 * The unlikely case where it already matches.
1854 */
1855 if (pPage->GCPhys == GCPhysCR3)
1856 {
1857 Assert(pPage->fMonitored);
1858 return VINF_SUCCESS;
1859 }
1860
1861 /*
1862 * Flush the current monitoring and remove it from the hash.
1863 */
1864 int rc = VINF_SUCCESS;
1865 if (pPage->fMonitored)
1866 {
1867 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1868 rc = pgmPoolMonitorFlush(pPool, pPage);
1869 if (rc == VERR_PGM_POOL_CLEARED)
1870 rc = VINF_SUCCESS;
1871 else
1872 AssertFatalRC(rc);
1873 pgmPoolHashRemove(pPool, pPage);
1874 }
1875
1876 /*
1877 * Monitor the page at the new location and insert it into the hash.
1878 */
1879 pPage->GCPhys = GCPhysCR3;
1880 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
1881 if (rc2 != VERR_PGM_POOL_CLEARED)
1882 {
1883 AssertFatalRC(rc2);
1884 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
1885 rc = rc2;
1886 }
1887 pgmPoolHashInsert(pPool, pPage);
1888 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
1889 return rc;
1890}
1891
1892
1893/**
1894 * Removes the monitoring of a CR3 page (special).
1895 *
1896 * @returns VBox status code.
1897 * @param pPool The pool.
1898 * @param idxRoot The CR3 (root) page index.
1899 */
1900int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
1901{
1902 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1903 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1904 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%RGp, .fMonitored=%d}\n",
1905 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
1906
1907 if (!pPage->fMonitored)
1908 return VINF_SUCCESS;
1909
1910 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1911 int rc = pgmPoolMonitorFlush(pPool, pPage);
1912 if (rc != VERR_PGM_POOL_CLEARED)
1913 AssertFatalRC(rc);
1914 else
1915 rc = VINF_SUCCESS;
1916 pgmPoolHashRemove(pPool, pPage);
1917 Assert(!pPage->fMonitored);
1918 pPage->GCPhys = NIL_RTGCPHYS;
1919 return rc;
1920}
1921
1922# endif /* PGMPOOL_WITH_MIXED_PT_CR3 */
1923
1924/**
1925 * Inserts the page into the list of modified pages.
1926 *
1927 * @param pPool The pool.
1928 * @param pPage The page.
1929 */
1930void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1931{
1932 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
1933 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
1934 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
1935 && pPool->iModifiedHead != pPage->idx,
1936 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
1937 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
1938 pPool->iModifiedHead, pPool->cModifiedPages));
1939
1940 pPage->iModifiedNext = pPool->iModifiedHead;
1941 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
1942 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
1943 pPool->iModifiedHead = pPage->idx;
1944 pPool->cModifiedPages++;
1945#ifdef VBOX_WITH_STATISTICS
1946 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
1947 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
1948#endif
1949}
1950
1951
1952/**
1953 * Removes the page from the list of modified pages and resets the
1954 * moficiation counter.
1955 *
1956 * @param pPool The pool.
1957 * @param pPage The page which is believed to be in the list of modified pages.
1958 */
1959static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1960{
1961 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
1962 if (pPool->iModifiedHead == pPage->idx)
1963 {
1964 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1965 pPool->iModifiedHead = pPage->iModifiedNext;
1966 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1967 {
1968 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
1969 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1970 }
1971 pPool->cModifiedPages--;
1972 }
1973 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
1974 {
1975 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
1976 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1977 {
1978 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
1979 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1980 }
1981 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1982 pPool->cModifiedPages--;
1983 }
1984 else
1985 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1986 pPage->cModifications = 0;
1987}
1988
1989
1990/**
1991 * Zaps the list of modified pages, resetting their modification counters in the process.
1992 *
1993 * @param pVM The VM handle.
1994 */
1995void pgmPoolMonitorModifiedClearAll(PVM pVM)
1996{
1997 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1998 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
1999
2000 unsigned cPages = 0; NOREF(cPages);
2001 uint16_t idx = pPool->iModifiedHead;
2002 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2003 while (idx != NIL_PGMPOOL_IDX)
2004 {
2005 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2006 idx = pPage->iModifiedNext;
2007 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2008 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2009 pPage->cModifications = 0;
2010 Assert(++cPages);
2011 }
2012 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2013 pPool->cModifiedPages = 0;
2014}
2015
2016
2017#ifdef IN_RING3
2018/**
2019 * Clear all shadow pages and clear all modification counters.
2020 *
2021 * @param pVM The VM handle.
2022 * @remark Should only be used when monitoring is available, thus placed in
2023 * the PGMPOOL_WITH_MONITORING #ifdef.
2024 */
2025void pgmPoolClearAll(PVM pVM)
2026{
2027 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2028 STAM_PROFILE_START(&pPool->StatClearAll, c);
2029 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
2030
2031 /*
2032 * Iterate all the pages until we've encountered all that in use.
2033 * This is simple but not quite optimal solution.
2034 */
2035 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
2036 unsigned cLeft = pPool->cUsedPages;
2037 unsigned iPage = pPool->cCurPages;
2038 while (--iPage >= PGMPOOL_IDX_FIRST)
2039 {
2040 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2041 if (pPage->GCPhys != NIL_RTGCPHYS)
2042 {
2043 switch (pPage->enmKind)
2044 {
2045 /*
2046 * We only care about shadow page tables.
2047 */
2048 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2049 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2050 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2051 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2052 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2053 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2054 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2055 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2056 {
2057#ifdef PGMPOOL_WITH_USER_TRACKING
2058 if (pPage->cPresent)
2059#endif
2060 {
2061 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
2062 STAM_PROFILE_START(&pPool->StatZeroPage, z);
2063 ASMMemZeroPage(pvShw);
2064 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
2065#ifdef PGMPOOL_WITH_USER_TRACKING
2066 pPage->cPresent = 0;
2067 pPage->iFirstPresent = ~0;
2068#endif
2069 }
2070 }
2071 /* fall thru */
2072
2073 default:
2074 Assert(!pPage->cModifications || ++cModifiedPages);
2075 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2076 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2077 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2078 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2079 pPage->cModifications = 0;
2080 break;
2081
2082 }
2083 if (!--cLeft)
2084 break;
2085 }
2086 }
2087
2088 /* swipe the special pages too. */
2089 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
2090 {
2091 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2092 if (pPage->GCPhys != NIL_RTGCPHYS)
2093 {
2094 Assert(!pPage->cModifications || ++cModifiedPages);
2095 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
2096 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
2097 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2098 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2099 pPage->cModifications = 0;
2100 }
2101 }
2102
2103#ifndef DEBUG_michael
2104 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
2105#endif
2106 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2107 pPool->cModifiedPages = 0;
2108
2109#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2110 /*
2111 * Clear all the GCPhys links and rebuild the phys ext free list.
2112 */
2113 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
2114 pRam;
2115 pRam = pRam->CTX_SUFF(pNext))
2116 {
2117 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2118 while (iPage-- > 0)
2119 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2120 }
2121
2122 pPool->iPhysExtFreeHead = 0;
2123 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2124 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
2125 for (unsigned i = 0; i < cMaxPhysExts; i++)
2126 {
2127 paPhysExts[i].iNext = i + 1;
2128 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
2129 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
2130 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
2131 }
2132 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2133#endif
2134
2135
2136 pPool->cPresent = 0;
2137 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
2138}
2139#endif /* IN_RING3 */
2140
2141
2142/**
2143 * Handle SyncCR3 pool tasks
2144 *
2145 * @returns VBox status code.
2146 * @retval VINF_SUCCESS if successfully added.
2147 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2148 * @param pVM The VM handle.
2149 * @remark Should only be used when monitoring is available, thus placed in
2150 * the PGMPOOL_WITH_MONITORING #ifdef.
2151 */
2152int pgmPoolSyncCR3(PVM pVM)
2153{
2154 /*
2155 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2156 * Occasionally we will have to clear all the shadow page tables because we wanted
2157 * to monitor a page which was mapped by too many shadowed page tables. This operation
2158 * sometimes refered to as a 'lightweight flush'.
2159 */
2160 if (!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL))
2161 pgmPoolMonitorModifiedClearAll(pVM);
2162 else
2163 {
2164# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2165 pVM->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
2166 pgmPoolClearAll(pVM);
2167# else /* !IN_RING3 */
2168 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2169 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2170 return VINF_PGM_SYNC_CR3;
2171# endif /* !IN_RING3 */
2172 }
2173 return VINF_SUCCESS;
2174}
2175
2176#endif /* PGMPOOL_WITH_MONITORING */
2177#ifdef PGMPOOL_WITH_USER_TRACKING
2178
2179/**
2180 * Frees up at least one user entry.
2181 *
2182 * @returns VBox status code.
2183 * @retval VINF_SUCCESS if successfully added.
2184 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2185 * @param pPool The pool.
2186 * @param iUser The user index.
2187 */
2188static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2189{
2190 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2191#ifdef PGMPOOL_WITH_CACHE
2192 /*
2193 * Just free cached pages in a braindead fashion.
2194 */
2195 /** @todo walk the age list backwards and free the first with usage. */
2196 int rc = VINF_SUCCESS;
2197 do
2198 {
2199 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2200 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2201 rc = rc2;
2202 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2203 return rc;
2204#else
2205 /*
2206 * Lazy approach.
2207 */
2208 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
2209 Assert(!CPUMIsGuestInLongMode(pVM));
2210 pgmPoolFlushAllInt(pPool);
2211 return VERR_PGM_POOL_FLUSHED;
2212#endif
2213}
2214
2215
2216/**
2217 * Inserts a page into the cache.
2218 *
2219 * This will create user node for the page, insert it into the GCPhys
2220 * hash, and insert it into the age list.
2221 *
2222 * @returns VBox status code.
2223 * @retval VINF_SUCCESS if successfully added.
2224 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2225 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
2226 * @param pPool The pool.
2227 * @param pPage The cached page.
2228 * @param GCPhys The GC physical address of the page we're gonna shadow.
2229 * @param iUser The user index.
2230 * @param iUserTable The user table index.
2231 */
2232DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2233{
2234 int rc = VINF_SUCCESS;
2235 PPGMPOOLUSER pUser = pPool->CTX_SUFF(paUsers);
2236
2237 LogFlow(("pgmPoolTrackInsert iUser %d iUserTable %d\n", iUser, iUserTable));
2238
2239 /*
2240 * Find free a user node.
2241 */
2242 uint16_t i = pPool->iUserFreeHead;
2243 if (i == NIL_PGMPOOL_USER_INDEX)
2244 {
2245 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2246 if (RT_FAILURE(rc))
2247 return rc;
2248 i = pPool->iUserFreeHead;
2249 }
2250
2251 /*
2252 * Unlink the user node from the free list,
2253 * initialize and insert it into the user list.
2254 */
2255 pPool->iUserFreeHead = pUser[i].iNext;
2256 pUser[i].iNext = NIL_PGMPOOL_USER_INDEX;
2257 pUser[i].iUser = iUser;
2258 pUser[i].iUserTable = iUserTable;
2259 pPage->iUserHead = i;
2260
2261 /*
2262 * Insert into cache and enable monitoring of the guest page if enabled.
2263 *
2264 * Until we implement caching of all levels, including the CR3 one, we'll
2265 * have to make sure we don't try monitor & cache any recursive reuse of
2266 * a monitored CR3 page. Because all windows versions are doing this we'll
2267 * have to be able to do combined access monitoring, CR3 + PT and
2268 * PD + PT (guest PAE).
2269 *
2270 * Update:
2271 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2272 */
2273#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
2274# ifdef PGMPOOL_WITH_MIXED_PT_CR3
2275 const bool fCanBeMonitored = true;
2276# else
2277 bool fCanBeMonitored = pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
2278 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
2279 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
2280# endif
2281# ifdef PGMPOOL_WITH_CACHE
2282 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2283# endif
2284 if (fCanBeMonitored)
2285 {
2286# ifdef PGMPOOL_WITH_MONITORING
2287 rc = pgmPoolMonitorInsert(pPool, pPage);
2288 if (rc == VERR_PGM_POOL_CLEARED)
2289 {
2290 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
2291# ifndef PGMPOOL_WITH_CACHE
2292 pgmPoolMonitorFlush(pPool, pPage);
2293 rc = VERR_PGM_POOL_FLUSHED;
2294# endif
2295 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2296 pUser[i].iNext = pPool->iUserFreeHead;
2297 pUser[i].iUser = NIL_PGMPOOL_IDX;
2298 pPool->iUserFreeHead = i;
2299 }
2300 }
2301# endif
2302#endif /* PGMPOOL_WITH_MONITORING */
2303 return rc;
2304}
2305
2306
2307# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
2308/**
2309 * Adds a user reference to a page.
2310 *
2311 * This will
2312 * This will move the page to the head of the
2313 *
2314 * @returns VBox status code.
2315 * @retval VINF_SUCCESS if successfully added.
2316 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2317 * @param pPool The pool.
2318 * @param pPage The cached page.
2319 * @param iUser The user index.
2320 * @param iUserTable The user table.
2321 */
2322static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2323{
2324 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2325
2326 LogFlow(("pgmPoolTrackAddUser iUser %d iUserTable %d\n", iUser, iUserTable));
2327# ifdef VBOX_STRICT
2328 /*
2329 * Check that the entry doesn't already exists.
2330 */
2331 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2332 {
2333 uint16_t i = pPage->iUserHead;
2334 do
2335 {
2336 Assert(i < pPool->cMaxUsers);
2337 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2338 i = paUsers[i].iNext;
2339 } while (i != NIL_PGMPOOL_USER_INDEX);
2340 }
2341# endif
2342
2343 /*
2344 * Allocate a user node.
2345 */
2346 uint16_t i = pPool->iUserFreeHead;
2347 if (i == NIL_PGMPOOL_USER_INDEX)
2348 {
2349 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2350 if (RT_FAILURE(rc))
2351 return rc;
2352 i = pPool->iUserFreeHead;
2353 }
2354 pPool->iUserFreeHead = paUsers[i].iNext;
2355
2356 /*
2357 * Initialize the user node and insert it.
2358 */
2359 paUsers[i].iNext = pPage->iUserHead;
2360 paUsers[i].iUser = iUser;
2361 paUsers[i].iUserTable = iUserTable;
2362 pPage->iUserHead = i;
2363
2364# ifdef PGMPOOL_WITH_CACHE
2365 /*
2366 * Tell the cache to update its replacement stats for this page.
2367 */
2368 pgmPoolCacheUsed(pPool, pPage);
2369# endif
2370 return VINF_SUCCESS;
2371}
2372# endif /* PGMPOOL_WITH_CACHE */
2373
2374
2375/**
2376 * Frees a user record associated with a page.
2377 *
2378 * This does not clear the entry in the user table, it simply replaces the
2379 * user record to the chain of free records.
2380 *
2381 * @param pPool The pool.
2382 * @param HCPhys The HC physical address of the shadow page.
2383 * @param iUser The shadow page pool index of the user table.
2384 * @param iUserTable The index into the user table (shadowed).
2385 */
2386static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2387{
2388 /*
2389 * Unlink and free the specified user entry.
2390 */
2391 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2392
2393 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2394 uint16_t i = pPage->iUserHead;
2395 if ( i != NIL_PGMPOOL_USER_INDEX
2396 && paUsers[i].iUser == iUser
2397 && paUsers[i].iUserTable == iUserTable)
2398 {
2399 pPage->iUserHead = paUsers[i].iNext;
2400
2401 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2402 paUsers[i].iNext = pPool->iUserFreeHead;
2403 pPool->iUserFreeHead = i;
2404 return;
2405 }
2406
2407 /* General: Linear search. */
2408 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2409 while (i != NIL_PGMPOOL_USER_INDEX)
2410 {
2411 if ( paUsers[i].iUser == iUser
2412 && paUsers[i].iUserTable == iUserTable)
2413 {
2414 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2415 paUsers[iPrev].iNext = paUsers[i].iNext;
2416 else
2417 pPage->iUserHead = paUsers[i].iNext;
2418
2419 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2420 paUsers[i].iNext = pPool->iUserFreeHead;
2421 pPool->iUserFreeHead = i;
2422 return;
2423 }
2424 iPrev = i;
2425 i = paUsers[i].iNext;
2426 }
2427
2428 /* Fatal: didn't find it */
2429 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2430 iUser, iUserTable, pPage->GCPhys));
2431}
2432
2433
2434/**
2435 * Gets the entry size of a shadow table.
2436 *
2437 * @param enmKind The kind of page.
2438 *
2439 * @returns The size of the entry in bytes. That is, 4 or 8.
2440 * @returns If the kind is not for a table, an assertion is raised and 0 is
2441 * returned.
2442 */
2443DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2444{
2445 switch (enmKind)
2446 {
2447 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2448 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2449 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2450#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2451 case PGMPOOLKIND_32BIT_PD:
2452 case PGMPOOLKIND_32BIT_PD_PHYS:
2453#else
2454 case PGMPOOLKIND_ROOT_32BIT_PD:
2455#endif
2456 return 4;
2457
2458 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2459 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2460 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2461 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2462 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2463 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2464 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2465 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2466 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2467 case PGMPOOLKIND_64BIT_PML4:
2468#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
2469 case PGMPOOLKIND_ROOT_PAE_PD:
2470 case PGMPOOLKIND_ROOT_PDPT:
2471#endif
2472 case PGMPOOLKIND_PAE_PDPT:
2473 case PGMPOOLKIND_ROOT_NESTED:
2474 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2475 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2476 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2477 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2478 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2479 case PGMPOOLKIND_PAE_PD_PHYS:
2480 case PGMPOOLKIND_PAE_PDPT_PHYS:
2481 return 8;
2482
2483 default:
2484 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2485 }
2486}
2487
2488
2489/**
2490 * Gets the entry size of a guest table.
2491 *
2492 * @param enmKind The kind of page.
2493 *
2494 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2495 * @returns If the kind is not for a table, an assertion is raised and 0 is
2496 * returned.
2497 */
2498DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2499{
2500 switch (enmKind)
2501 {
2502 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2503 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2504#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2505 case PGMPOOLKIND_32BIT_PD:
2506#else
2507 case PGMPOOLKIND_ROOT_32BIT_PD:
2508#endif
2509 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2510 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2511 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2512 return 4;
2513
2514 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2515 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2516 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2517 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2518 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2519 case PGMPOOLKIND_64BIT_PML4:
2520#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2521 case PGMPOOLKIND_PAE_PDPT:
2522#else
2523 case PGMPOOLKIND_ROOT_PAE_PD:
2524 case PGMPOOLKIND_ROOT_PDPT:
2525#endif
2526 return 8;
2527
2528 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2529 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2530 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2531 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2532 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2533 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2534 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2535 case PGMPOOLKIND_ROOT_NESTED:
2536 case PGMPOOLKIND_PAE_PD_PHYS:
2537 case PGMPOOLKIND_PAE_PDPT_PHYS:
2538 case PGMPOOLKIND_32BIT_PD_PHYS:
2539 /** @todo can we return 0? (nobody is calling this...) */
2540 AssertFailed();
2541 return 0;
2542
2543 default:
2544 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2545 }
2546}
2547
2548#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2549
2550/**
2551 * Scans one shadow page table for mappings of a physical page.
2552 *
2553 * @param pVM The VM handle.
2554 * @param pPhysPage The guest page in question.
2555 * @param iShw The shadow page table.
2556 * @param cRefs The number of references made in that PT.
2557 */
2558static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2559{
2560 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2561 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2562
2563 /*
2564 * Assert sanity.
2565 */
2566 Assert(cRefs == 1);
2567 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2568 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2569
2570 /*
2571 * Then, clear the actual mappings to the page in the shadow PT.
2572 */
2573 switch (pPage->enmKind)
2574 {
2575 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2576 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2577 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2578 {
2579 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2580 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2581 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2582 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2583 {
2584 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2585 pPT->a[i].u = 0;
2586 cRefs--;
2587 if (!cRefs)
2588 return;
2589 }
2590#ifdef LOG_ENABLED
2591 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2592 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2593 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2594 {
2595 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2596 pPT->a[i].u = 0;
2597 }
2598#endif
2599 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2600 break;
2601 }
2602
2603 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2604 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2605 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2606 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2607 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2608 {
2609 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2610 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2611 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2612 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2613 {
2614 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2615 pPT->a[i].u = 0;
2616 cRefs--;
2617 if (!cRefs)
2618 return;
2619 }
2620#ifdef LOG_ENABLED
2621 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2622 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2623 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2624 {
2625 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2626 pPT->a[i].u = 0;
2627 }
2628#endif
2629 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u64=%RX64\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u64));
2630 break;
2631 }
2632
2633 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2634 {
2635 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2636 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2637 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2638 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2639 {
2640 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2641 pPT->a[i].u = 0;
2642 cRefs--;
2643 if (!cRefs)
2644 return;
2645 }
2646#ifdef LOG_ENABLED
2647 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2648 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2649 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2650 {
2651 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2652 pPT->a[i].u = 0;
2653 }
2654#endif
2655 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2656 break;
2657 }
2658
2659 default:
2660 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2661 }
2662}
2663
2664
2665/**
2666 * Scans one shadow page table for mappings of a physical page.
2667 *
2668 * @param pVM The VM handle.
2669 * @param pPhysPage The guest page in question.
2670 * @param iShw The shadow page table.
2671 * @param cRefs The number of references made in that PT.
2672 */
2673void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2674{
2675 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
2676 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2677 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2678 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2679 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2680 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2681}
2682
2683
2684/**
2685 * Flushes a list of shadow page tables mapping the same physical page.
2686 *
2687 * @param pVM The VM handle.
2688 * @param pPhysPage The guest page in question.
2689 * @param iPhysExt The physical cross reference extent list to flush.
2690 */
2691void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2692{
2693 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2694 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2695 LogFlow(("pgmPoolTrackFlushGCPhysPTs: HCPhys=%RHp iPhysExt\n", pPhysPage->HCPhys, iPhysExt));
2696
2697 const uint16_t iPhysExtStart = iPhysExt;
2698 PPGMPOOLPHYSEXT pPhysExt;
2699 do
2700 {
2701 Assert(iPhysExt < pPool->cMaxPhysExts);
2702 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2703 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2704 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2705 {
2706 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2707 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2708 }
2709
2710 /* next */
2711 iPhysExt = pPhysExt->iNext;
2712 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2713
2714 /* insert the list into the free list and clear the ram range entry. */
2715 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2716 pPool->iPhysExtFreeHead = iPhysExtStart;
2717 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2718
2719 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2720}
2721
2722#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2723
2724/**
2725 * Scans all shadow page tables for mappings of a physical page.
2726 *
2727 * This may be slow, but it's most likely more efficient than cleaning
2728 * out the entire page pool / cache.
2729 *
2730 * @returns VBox status code.
2731 * @retval VINF_SUCCESS if all references has been successfully cleared.
2732 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2733 * a page pool cleaning.
2734 *
2735 * @param pVM The VM handle.
2736 * @param pPhysPage The guest page in question.
2737 */
2738int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
2739{
2740 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2741 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2742 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d HCPhys=%RHp\n",
2743 pPool->cUsedPages, pPool->cPresent, pPhysPage->HCPhys));
2744
2745#if 1
2746 /*
2747 * There is a limit to what makes sense.
2748 */
2749 if (pPool->cPresent > 1024)
2750 {
2751 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2752 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2753 return VINF_PGM_GCPHYS_ALIASED;
2754 }
2755#endif
2756
2757 /*
2758 * Iterate all the pages until we've encountered all that in use.
2759 * This is simple but not quite optimal solution.
2760 */
2761 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2762 const uint32_t u32 = u64;
2763 unsigned cLeft = pPool->cUsedPages;
2764 unsigned iPage = pPool->cCurPages;
2765 while (--iPage >= PGMPOOL_IDX_FIRST)
2766 {
2767 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2768 if (pPage->GCPhys != NIL_RTGCPHYS)
2769 {
2770 switch (pPage->enmKind)
2771 {
2772 /*
2773 * We only care about shadow page tables.
2774 */
2775 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2776 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2777 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2778 {
2779 unsigned cPresent = pPage->cPresent;
2780 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2781 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2782 if (pPT->a[i].n.u1Present)
2783 {
2784 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2785 {
2786 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
2787 pPT->a[i].u = 0;
2788 }
2789 if (!--cPresent)
2790 break;
2791 }
2792 break;
2793 }
2794
2795 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2796 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2797 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2798 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2799 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2800 {
2801 unsigned cPresent = pPage->cPresent;
2802 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2803 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2804 if (pPT->a[i].n.u1Present)
2805 {
2806 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2807 {
2808 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
2809 pPT->a[i].u = 0;
2810 }
2811 if (!--cPresent)
2812 break;
2813 }
2814 break;
2815 }
2816 }
2817 if (!--cLeft)
2818 break;
2819 }
2820 }
2821
2822 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2823 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2824 return VINF_SUCCESS;
2825}
2826
2827
2828/**
2829 * Clears the user entry in a user table.
2830 *
2831 * This is used to remove all references to a page when flushing it.
2832 */
2833static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
2834{
2835 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
2836 Assert(pUser->iUser < pPool->cCurPages);
2837 uint32_t iUserTable = pUser->iUserTable;
2838
2839 /*
2840 * Map the user page.
2841 */
2842 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
2843#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
2844 if (pUserPage->enmKind == PGMPOOLKIND_ROOT_PAE_PD)
2845 {
2846 /* Must translate the fake 2048 entry PD to a 512 PD one since the R0 mapping is not linear. */
2847 Assert(pUser->iUser == PGMPOOL_IDX_PAE_PD);
2848 uint32_t iPdpt = iUserTable / X86_PG_PAE_ENTRIES;
2849 iUserTable %= X86_PG_PAE_ENTRIES;
2850 pUserPage = &pPool->aPages[PGMPOOL_IDX_PAE_PD_0 + iPdpt];
2851 Assert(pUserPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD);
2852 }
2853#endif
2854 union
2855 {
2856 uint64_t *pau64;
2857 uint32_t *pau32;
2858 } u;
2859 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
2860
2861 /* Safety precaution in case we change the paging for other modes too in the future. */
2862 Assert(PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) != pPage->Core.Key);
2863
2864#ifdef VBOX_STRICT
2865 /*
2866 * Some sanity checks.
2867 */
2868 switch (pUserPage->enmKind)
2869 {
2870# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2871 case PGMPOOLKIND_32BIT_PD:
2872 case PGMPOOLKIND_32BIT_PD_PHYS:
2873 Assert(iUserTable < X86_PG_ENTRIES);
2874 break;
2875# else
2876 case PGMPOOLKIND_ROOT_32BIT_PD:
2877 Assert(iUserTable < X86_PG_ENTRIES);
2878 Assert(!(u.pau32[iUserTable] & PGM_PDFLAGS_MAPPING));
2879 break;
2880# endif
2881# if !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
2882 case PGMPOOLKIND_ROOT_PAE_PD:
2883 Assert(iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
2884 AssertMsg(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING), ("%llx %d\n", u.pau64[iUserTable], iUserTable));
2885 break;
2886# endif
2887# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2888 case PGMPOOLKIND_PAE_PDPT:
2889 case PGMPOOLKIND_PAE_PDPT_PHYS:
2890# else
2891 case PGMPOOLKIND_ROOT_PDPT:
2892# endif
2893 Assert(iUserTable < 4);
2894 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
2895 break;
2896 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2897 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2898 case PGMPOOLKIND_PAE_PD_PHYS:
2899 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2900 break;
2901 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2902 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2903 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
2904 break;
2905 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2906 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2907 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
2908 break;
2909 case PGMPOOLKIND_64BIT_PML4:
2910 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
2911 /* GCPhys >> PAGE_SHIFT is the index here */
2912 break;
2913 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2914 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2915 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2916 break;
2917
2918 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2919 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2920 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2921 break;
2922
2923 case PGMPOOLKIND_ROOT_NESTED:
2924 Assert(iUserTable < X86_PG_PAE_ENTRIES);
2925 break;
2926
2927 default:
2928 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
2929 break;
2930 }
2931#endif /* VBOX_STRICT */
2932
2933 /*
2934 * Clear the entry in the user page.
2935 */
2936 switch (pUserPage->enmKind)
2937 {
2938 /* 32-bit entries */
2939#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2940 case PGMPOOLKIND_32BIT_PD:
2941 case PGMPOOLKIND_32BIT_PD_PHYS:
2942#else
2943 case PGMPOOLKIND_ROOT_32BIT_PD:
2944#endif
2945 u.pau32[iUserTable] = 0;
2946 break;
2947
2948 /* 64-bit entries */
2949 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2950 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2951 case PGMPOOLKIND_PAE_PD_PHYS:
2952 case PGMPOOLKIND_PAE_PDPT_PHYS:
2953 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2954 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2955 case PGMPOOLKIND_64BIT_PML4:
2956 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2957 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2958# if !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
2959 case PGMPOOLKIND_ROOT_PAE_PD:
2960#endif
2961#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2962 case PGMPOOLKIND_PAE_PDPT:
2963#else
2964 case PGMPOOLKIND_ROOT_PDPT:
2965#endif
2966 case PGMPOOLKIND_ROOT_NESTED:
2967 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2968 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2969 u.pau64[iUserTable] = 0;
2970 break;
2971
2972 default:
2973 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
2974 }
2975}
2976
2977
2978/**
2979 * Clears all users of a page.
2980 */
2981static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2982{
2983 /*
2984 * Free all the user records.
2985 */
2986 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2987 uint16_t i = pPage->iUserHead;
2988 while (i != NIL_PGMPOOL_USER_INDEX)
2989 {
2990 /* Clear enter in user table. */
2991 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
2992
2993 /* Free it. */
2994 const uint16_t iNext = paUsers[i].iNext;
2995 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2996 paUsers[i].iNext = pPool->iUserFreeHead;
2997 pPool->iUserFreeHead = i;
2998
2999 /* Next. */
3000 i = iNext;
3001 }
3002 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3003}
3004
3005#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3006
3007/**
3008 * Allocates a new physical cross reference extent.
3009 *
3010 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3011 * @param pVM The VM handle.
3012 * @param piPhysExt Where to store the phys ext index.
3013 */
3014PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3015{
3016 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3017 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3018 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3019 {
3020 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3021 return NULL;
3022 }
3023 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3024 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3025 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3026 *piPhysExt = iPhysExt;
3027 return pPhysExt;
3028}
3029
3030
3031/**
3032 * Frees a physical cross reference extent.
3033 *
3034 * @param pVM The VM handle.
3035 * @param iPhysExt The extent to free.
3036 */
3037void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3038{
3039 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3040 Assert(iPhysExt < pPool->cMaxPhysExts);
3041 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3042 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3043 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3044 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3045 pPool->iPhysExtFreeHead = iPhysExt;
3046}
3047
3048
3049/**
3050 * Frees a physical cross reference extent.
3051 *
3052 * @param pVM The VM handle.
3053 * @param iPhysExt The extent to free.
3054 */
3055void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3056{
3057 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3058
3059 const uint16_t iPhysExtStart = iPhysExt;
3060 PPGMPOOLPHYSEXT pPhysExt;
3061 do
3062 {
3063 Assert(iPhysExt < pPool->cMaxPhysExts);
3064 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3065 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3066 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3067
3068 /* next */
3069 iPhysExt = pPhysExt->iNext;
3070 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3071
3072 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3073 pPool->iPhysExtFreeHead = iPhysExtStart;
3074}
3075
3076
3077/**
3078 * Insert a reference into a list of physical cross reference extents.
3079 *
3080 * @returns The new ram range flags (top 16-bits).
3081 *
3082 * @param pVM The VM handle.
3083 * @param iPhysExt The physical extent index of the list head.
3084 * @param iShwPT The shadow page table index.
3085 *
3086 */
3087static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
3088{
3089 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3090 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3091
3092 /* special common case. */
3093 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3094 {
3095 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3096 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3097 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
3098 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3099 }
3100
3101 /* general treatment. */
3102 const uint16_t iPhysExtStart = iPhysExt;
3103 unsigned cMax = 15;
3104 for (;;)
3105 {
3106 Assert(iPhysExt < pPool->cMaxPhysExts);
3107 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3108 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3109 {
3110 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3111 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3112 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
3113 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3114 }
3115 if (!--cMax)
3116 {
3117 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3118 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3119 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
3120 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3121 }
3122 }
3123
3124 /* add another extent to the list. */
3125 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3126 if (!pNew)
3127 {
3128 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3129 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3130 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3131 }
3132 pNew->iNext = iPhysExtStart;
3133 pNew->aidx[0] = iShwPT;
3134 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
3135 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3136}
3137
3138
3139/**
3140 * Add a reference to guest physical page where extents are in use.
3141 *
3142 * @returns The new ram range flags (top 16-bits).
3143 *
3144 * @param pVM The VM handle.
3145 * @param u16 The ram range flags (top 16-bits).
3146 * @param iShwPT The shadow page table index.
3147 */
3148uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
3149{
3150 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
3151 {
3152 /*
3153 * Convert to extent list.
3154 */
3155 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
3156 uint16_t iPhysExt;
3157 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3158 if (pPhysExt)
3159 {
3160 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
3161 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
3162 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
3163 pPhysExt->aidx[1] = iShwPT;
3164 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3165 }
3166 else
3167 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
3168 }
3169 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
3170 {
3171 /*
3172 * Insert into the extent list.
3173 */
3174 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
3175 }
3176 else
3177 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3178 return u16;
3179}
3180
3181
3182/**
3183 * Clear references to guest physical memory.
3184 *
3185 * @param pPool The pool.
3186 * @param pPage The page.
3187 * @param pPhysPage Pointer to the aPages entry in the ram range.
3188 */
3189void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
3190{
3191 const unsigned cRefs = pPhysPage->HCPhys >> MM_RAM_FLAGS_CREFS_SHIFT; /** @todo PAGE FLAGS */
3192 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
3193
3194 uint16_t iPhysExt = (pPhysPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
3195 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
3196 {
3197 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3198 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3199 do
3200 {
3201 Assert(iPhysExt < pPool->cMaxPhysExts);
3202
3203 /*
3204 * Look for the shadow page and check if it's all freed.
3205 */
3206 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3207 {
3208 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
3209 {
3210 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3211
3212 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3213 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3214 {
3215 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
3216 return;
3217 }
3218
3219 /* we can free the node. */
3220 PVM pVM = pPool->CTX_SUFF(pVM);
3221 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3222 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3223 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3224 {
3225 /* lonely node */
3226 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3227 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d lonely\n", pPhysPage->HCPhys, pPage->idx));
3228 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3229 }
3230 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3231 {
3232 /* head */
3233 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d head\n", pPhysPage->HCPhys, pPage->idx));
3234 pPhysPage->HCPhys = (pPhysPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) /** @todo PAGE FLAGS */
3235 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
3236 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
3237 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3238 }
3239 else
3240 {
3241 /* in list */
3242 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
3243 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3244 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3245 }
3246 iPhysExt = iPhysExtNext;
3247 return;
3248 }
3249 }
3250
3251 /* next */
3252 iPhysExtPrev = iPhysExt;
3253 iPhysExt = paPhysExts[iPhysExt].iNext;
3254 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3255
3256 AssertFatalMsgFailed(("not-found! cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
3257 }
3258 else /* nothing to do */
3259 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64\n", pPhysPage->HCPhys));
3260}
3261
3262
3263/**
3264 * Clear references to guest physical memory.
3265 *
3266 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3267 * is assumed to be correct, so the linear search can be skipped and we can assert
3268 * at an earlier point.
3269 *
3270 * @param pPool The pool.
3271 * @param pPage The page.
3272 * @param HCPhys The host physical address corresponding to the guest page.
3273 * @param GCPhys The guest physical address corresponding to HCPhys.
3274 */
3275static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3276{
3277 /*
3278 * Walk range list.
3279 */
3280 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3281 while (pRam)
3282 {
3283 RTGCPHYS off = GCPhys - pRam->GCPhys;
3284 if (off < pRam->cb)
3285 {
3286 /* does it match? */
3287 const unsigned iPage = off >> PAGE_SHIFT;
3288 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3289#ifdef LOG_ENABLED
3290RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3291Log(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
3292#endif
3293 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3294 {
3295 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3296 return;
3297 }
3298 break;
3299 }
3300 pRam = pRam->CTX_SUFF(pNext);
3301 }
3302 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
3303}
3304
3305
3306/**
3307 * Clear references to guest physical memory.
3308 *
3309 * @param pPool The pool.
3310 * @param pPage The page.
3311 * @param HCPhys The host physical address corresponding to the guest page.
3312 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3313 */
3314static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3315{
3316 /*
3317 * Walk range list.
3318 */
3319 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3320 while (pRam)
3321 {
3322 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3323 if (off < pRam->cb)
3324 {
3325 /* does it match? */
3326 const unsigned iPage = off >> PAGE_SHIFT;
3327 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3328 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3329 {
3330 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3331 return;
3332 }
3333 break;
3334 }
3335 pRam = pRam->CTX_SUFF(pNext);
3336 }
3337
3338 /*
3339 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3340 */
3341 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3342 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3343 while (pRam)
3344 {
3345 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3346 while (iPage-- > 0)
3347 {
3348 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3349 {
3350 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
3351 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3352 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3353 return;
3354 }
3355 }
3356 pRam = pRam->CTX_SUFF(pNext);
3357 }
3358
3359 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
3360}
3361
3362
3363/**
3364 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3365 *
3366 * @param pPool The pool.
3367 * @param pPage The page.
3368 * @param pShwPT The shadow page table (mapping of the page).
3369 * @param pGstPT The guest page table.
3370 */
3371DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3372{
3373 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3374 if (pShwPT->a[i].n.u1Present)
3375 {
3376 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3377 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3378 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3379 if (!--pPage->cPresent)
3380 break;
3381 }
3382}
3383
3384
3385/**
3386 * Clear references to guest physical memory in a PAE / 32-bit page table.
3387 *
3388 * @param pPool The pool.
3389 * @param pPage The page.
3390 * @param pShwPT The shadow page table (mapping of the page).
3391 * @param pGstPT The guest page table (just a half one).
3392 */
3393DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3394{
3395 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3396 if (pShwPT->a[i].n.u1Present)
3397 {
3398 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
3399 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3400 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3401 }
3402}
3403
3404
3405/**
3406 * Clear references to guest physical memory in a PAE / PAE page table.
3407 *
3408 * @param pPool The pool.
3409 * @param pPage The page.
3410 * @param pShwPT The shadow page table (mapping of the page).
3411 * @param pGstPT The guest page table.
3412 */
3413DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3414{
3415 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3416 if (pShwPT->a[i].n.u1Present)
3417 {
3418 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3419 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3420 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3421 }
3422}
3423
3424
3425/**
3426 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3427 *
3428 * @param pPool The pool.
3429 * @param pPage The page.
3430 * @param pShwPT The shadow page table (mapping of the page).
3431 */
3432DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3433{
3434 RTGCPHYS GCPhys = pPage->GCPhys;
3435 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3436 if (pShwPT->a[i].n.u1Present)
3437 {
3438 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3439 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3440 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3441 }
3442}
3443
3444
3445/**
3446 * Clear references to guest physical memory in a PAE / 2/4MB page table.
3447 *
3448 * @param pPool The pool.
3449 * @param pPage The page.
3450 * @param pShwPT The shadow page table (mapping of the page).
3451 */
3452DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
3453{
3454 RTGCPHYS GCPhys = pPage->GCPhys;
3455 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3456 if (pShwPT->a[i].n.u1Present)
3457 {
3458 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
3459 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
3460 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
3461 }
3462}
3463
3464#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3465
3466
3467#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3468/**
3469 * Clear references to shadowed pages in a 32 bits page directory.
3470 *
3471 * @param pPool The pool.
3472 * @param pPage The page.
3473 * @param pShwPD The shadow page directory (mapping of the page).
3474 */
3475DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
3476{
3477 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3478 {
3479 if ( pShwPD->a[i].n.u1Present
3480 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
3481 )
3482 {
3483 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
3484 if (pSubPage)
3485 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3486 else
3487 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
3488 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3489 }
3490 }
3491}
3492#endif
3493
3494/**
3495 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
3496 *
3497 * @param pPool The pool.
3498 * @param pPage The page.
3499 * @param pShwPD The shadow page directory (mapping of the page).
3500 */
3501DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
3502{
3503 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3504 {
3505 if ( pShwPD->a[i].n.u1Present
3506#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3507 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
3508#endif
3509 )
3510 {
3511 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
3512 if (pSubPage)
3513 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3514 else
3515 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
3516 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3517 }
3518 }
3519}
3520
3521
3522/**
3523 * Clear references to shadowed pages in a 64-bit page directory pointer table.
3524 *
3525 * @param pPool The pool.
3526 * @param pPage The page.
3527 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3528 */
3529DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
3530{
3531 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3532 {
3533 if ( pShwPDPT->a[i].n.u1Present
3534#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3535 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
3536#endif
3537 )
3538 {
3539 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
3540 if (pSubPage)
3541 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3542 else
3543 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
3544 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3545 }
3546 }
3547}
3548
3549
3550/**
3551 * Clear references to shadowed pages in a 64-bit level 4 page table.
3552 *
3553 * @param pPool The pool.
3554 * @param pPage The page.
3555 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3556 */
3557DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
3558{
3559 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
3560 {
3561 if (pShwPML4->a[i].n.u1Present)
3562 {
3563 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
3564 if (pSubPage)
3565 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3566 else
3567 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
3568 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3569 }
3570 }
3571}
3572
3573
3574/**
3575 * Clear references to shadowed pages in an EPT page table.
3576 *
3577 * @param pPool The pool.
3578 * @param pPage The page.
3579 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3580 */
3581DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
3582{
3583 RTGCPHYS GCPhys = pPage->GCPhys;
3584 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3585 if (pShwPT->a[i].n.u1Present)
3586 {
3587 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
3588 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
3589 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
3590 }
3591}
3592
3593
3594/**
3595 * Clear references to shadowed pages in an EPT page directory.
3596 *
3597 * @param pPool The pool.
3598 * @param pPage The page.
3599 * @param pShwPD The shadow page directory (mapping of the page).
3600 */
3601DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
3602{
3603 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3604 {
3605 if (pShwPD->a[i].n.u1Present)
3606 {
3607 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
3608 if (pSubPage)
3609 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3610 else
3611 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
3612 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3613 }
3614 }
3615}
3616
3617
3618/**
3619 * Clear references to shadowed pages in an EPT page directory pointer table.
3620 *
3621 * @param pPool The pool.
3622 * @param pPage The page.
3623 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3624 */
3625DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
3626{
3627 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3628 {
3629 if (pShwPDPT->a[i].n.u1Present)
3630 {
3631 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
3632 if (pSubPage)
3633 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3634 else
3635 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
3636 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3637 }
3638 }
3639}
3640
3641
3642/**
3643 * Clears all references made by this page.
3644 *
3645 * This includes other shadow pages and GC physical addresses.
3646 *
3647 * @param pPool The pool.
3648 * @param pPage The page.
3649 */
3650static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3651{
3652 /*
3653 * Map the shadow page and take action according to the page kind.
3654 */
3655 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3656 switch (pPage->enmKind)
3657 {
3658#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3659 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3660 {
3661 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3662 void *pvGst;
3663 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3664 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
3665 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3666 break;
3667 }
3668
3669 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3670 {
3671 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3672 void *pvGst;
3673 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3674 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
3675 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3676 break;
3677 }
3678
3679 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3680 {
3681 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3682 void *pvGst;
3683 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3684 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
3685 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3686 break;
3687 }
3688
3689 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
3690 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3691 {
3692 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3693 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
3694 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3695 break;
3696 }
3697
3698 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
3699 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3700 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3701 {
3702 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3703 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
3704 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3705 break;
3706 }
3707
3708#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3709 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3710 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3711 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3712 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3713 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3714 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3715 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3716 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3717 break;
3718#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3719
3720 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
3721 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3722 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3723 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3724 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
3725 break;
3726
3727#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3728 case PGMPOOLKIND_32BIT_PD:
3729 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
3730 break;
3731
3732 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3733 case PGMPOOLKIND_PAE_PDPT:
3734#endif
3735 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3736 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3737 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
3738 break;
3739
3740 case PGMPOOLKIND_64BIT_PML4:
3741 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
3742 break;
3743
3744 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3745 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
3746 break;
3747
3748 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3749 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
3750 break;
3751
3752 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3753 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
3754 break;
3755
3756 default:
3757 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
3758 }
3759
3760 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
3761 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3762 ASMMemZeroPage(pvShw);
3763 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3764 pPage->fZeroed = true;
3765}
3766
3767#endif /* PGMPOOL_WITH_USER_TRACKING */
3768
3769/**
3770 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
3771 *
3772 * @param pPool The pool.
3773 */
3774static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
3775{
3776#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3777 /* Start a subset so we won't run out of mapping space. */
3778 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
3779 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
3780#endif
3781
3782 /*
3783 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
3784 */
3785 Assert(NIL_PGMPOOL_IDX == 0);
3786 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
3787 {
3788 /*
3789 * Get the page address.
3790 */
3791 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3792 union
3793 {
3794 uint64_t *pau64;
3795 uint32_t *pau32;
3796 } u;
3797
3798 /*
3799 * Mark stuff not present.
3800 */
3801 switch (pPage->enmKind)
3802 {
3803#ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
3804 case PGMPOOLKIND_ROOT_32BIT_PD:
3805 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3806 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
3807 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3808 u.pau32[iPage] = 0;
3809 break;
3810
3811 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3812 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3813 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES; iPage++)
3814 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3815 u.pau64[iPage] = 0;
3816 break;
3817
3818 case PGMPOOLKIND_ROOT_PDPT:
3819 /* Not root of shadowed pages currently, ignore it. */
3820 break;
3821#endif
3822
3823 case PGMPOOLKIND_ROOT_NESTED:
3824 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3825 ASMMemZero32(u.pau64, PAGE_SIZE);
3826 break;
3827 }
3828 }
3829
3830 /*
3831 * Paranoia (to be removed), flag a global CR3 sync.
3832 */
3833 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
3834
3835#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3836 /* Pop the subset. */
3837 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
3838#endif
3839}
3840
3841
3842/**
3843 * Flushes the entire cache.
3844 *
3845 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3846 * and execute this CR3 flush.
3847 *
3848 * @param pPool The pool.
3849 */
3850static void pgmPoolFlushAllInt(PPGMPOOL pPool)
3851{
3852 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
3853 LogFlow(("pgmPoolFlushAllInt:\n"));
3854
3855 /*
3856 * If there are no pages in the pool, there is nothing to do.
3857 */
3858 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
3859 {
3860 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3861 return;
3862 }
3863
3864 /*
3865 * Nuke the free list and reinsert all pages into it.
3866 */
3867 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
3868 {
3869 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3870
3871#ifdef IN_RING3
3872 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMR3, pPage->pvPageR3));
3873#endif
3874#ifdef PGMPOOL_WITH_MONITORING
3875 if (pPage->fMonitored)
3876 pgmPoolMonitorFlush(pPool, pPage);
3877 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3878 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3879 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3880 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3881 pPage->cModifications = 0;
3882#endif
3883 pPage->GCPhys = NIL_RTGCPHYS;
3884 pPage->enmKind = PGMPOOLKIND_FREE;
3885 Assert(pPage->idx == i);
3886 pPage->iNext = i + 1;
3887 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
3888 pPage->fSeenNonGlobal = false;
3889 pPage->fMonitored= false;
3890 pPage->fCached = false;
3891 pPage->fReusedFlushPending = false;
3892 pPage->fCR3Mix = false;
3893#ifdef PGMPOOL_WITH_USER_TRACKING
3894 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3895#endif
3896#ifdef PGMPOOL_WITH_CACHE
3897 pPage->iAgeNext = NIL_PGMPOOL_IDX;
3898 pPage->iAgePrev = NIL_PGMPOOL_IDX;
3899#endif
3900 }
3901 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
3902 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
3903 pPool->cUsedPages = 0;
3904
3905#ifdef PGMPOOL_WITH_USER_TRACKING
3906 /*
3907 * Zap and reinitialize the user records.
3908 */
3909 pPool->cPresent = 0;
3910 pPool->iUserFreeHead = 0;
3911 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3912 const unsigned cMaxUsers = pPool->cMaxUsers;
3913 for (unsigned i = 0; i < cMaxUsers; i++)
3914 {
3915 paUsers[i].iNext = i + 1;
3916 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3917 paUsers[i].iUserTable = 0xfffffffe;
3918 }
3919 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
3920#endif
3921
3922#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3923 /*
3924 * Clear all the GCPhys links and rebuild the phys ext free list.
3925 */
3926 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3927 pRam;
3928 pRam = pRam->CTX_SUFF(pNext))
3929 {
3930 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3931 while (iPage-- > 0)
3932 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3933 }
3934
3935 pPool->iPhysExtFreeHead = 0;
3936 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3937 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
3938 for (unsigned i = 0; i < cMaxPhysExts; i++)
3939 {
3940 paPhysExts[i].iNext = i + 1;
3941 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
3942 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
3943 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
3944 }
3945 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3946#endif
3947
3948#ifdef PGMPOOL_WITH_MONITORING
3949 /*
3950 * Just zap the modified list.
3951 */
3952 pPool->cModifiedPages = 0;
3953 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
3954#endif
3955
3956#ifdef PGMPOOL_WITH_CACHE
3957 /*
3958 * Clear the GCPhys hash and the age list.
3959 */
3960 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
3961 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
3962 pPool->iAgeHead = NIL_PGMPOOL_IDX;
3963 pPool->iAgeTail = NIL_PGMPOOL_IDX;
3964#endif
3965
3966 /*
3967 * Flush all the special root pages.
3968 * Reinsert active pages into the hash and ensure monitoring chains are correct.
3969 */
3970 pgmPoolFlushAllSpecialRoots(pPool);
3971 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
3972 {
3973 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3974 pPage->iNext = NIL_PGMPOOL_IDX;
3975#ifdef PGMPOOL_WITH_MONITORING
3976 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3977 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3978 pPage->cModifications = 0;
3979 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
3980 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3981 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3982 if (pPage->fMonitored)
3983 {
3984 PVM pVM = pPool->CTX_SUFF(pVM);
3985 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
3986 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
3987 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
3988 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
3989 pPool->pszAccessHandler);
3990 AssertFatalRCSuccess(rc);
3991# ifdef PGMPOOL_WITH_CACHE
3992 pgmPoolHashInsert(pPool, pPage);
3993# endif
3994 }
3995#endif
3996#ifdef PGMPOOL_WITH_USER_TRACKING
3997 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
3998#endif
3999#ifdef PGMPOOL_WITH_CACHE
4000 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
4001 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
4002#endif
4003 }
4004
4005 /*
4006 * Finally, assert the FF.
4007 */
4008 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
4009
4010 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
4011}
4012
4013
4014/**
4015 * Flushes a pool page.
4016 *
4017 * This moves the page to the free list after removing all user references to it.
4018 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
4019 *
4020 * @returns VBox status code.
4021 * @retval VINF_SUCCESS on success.
4022 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
4023 * @param pPool The pool.
4024 * @param HCPhys The HC physical address of the shadow page.
4025 */
4026int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4027{
4028 int rc = VINF_SUCCESS;
4029 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4030 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%d, .GCPhys=%RGp}\n",
4031 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
4032
4033 /*
4034 * Quietly reject any attempts at flushing any of the special root pages.
4035 */
4036 if (pPage->idx < PGMPOOL_IDX_FIRST)
4037 {
4038 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
4039 return VINF_SUCCESS;
4040 }
4041
4042 /*
4043 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4044 */
4045 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
4046 {
4047#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
4048 AssertMsg(pPage->enmKind == PGMPOOLKIND_64BIT_PML4,
4049 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(pPool->CTX_SUFF(pVM)), pPage->Core.Key, pPage->enmKind));
4050#endif
4051 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
4052 return VINF_SUCCESS;
4053 }
4054
4055#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4056 /* Start a subset so we won't run out of mapping space. */
4057 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
4058 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4059#endif
4060
4061 /*
4062 * Mark the page as being in need of a ASMMemZeroPage().
4063 */
4064 pPage->fZeroed = false;
4065
4066#ifdef PGMPOOL_WITH_USER_TRACKING
4067 /*
4068 * Clear the page.
4069 */
4070 pgmPoolTrackClearPageUsers(pPool, pPage);
4071 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4072 pgmPoolTrackDeref(pPool, pPage);
4073 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4074#endif
4075
4076#ifdef PGMPOOL_WITH_CACHE
4077 /*
4078 * Flush it from the cache.
4079 */
4080 pgmPoolCacheFlushPage(pPool, pPage);
4081#endif /* PGMPOOL_WITH_CACHE */
4082
4083#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4084 /* Heavy stuff done. */
4085 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4086#endif
4087
4088#ifdef PGMPOOL_WITH_MONITORING
4089 /*
4090 * Deregistering the monitoring.
4091 */
4092 if (pPage->fMonitored)
4093 rc = pgmPoolMonitorFlush(pPool, pPage);
4094#endif
4095
4096 /*
4097 * Free the page.
4098 */
4099 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4100 pPage->iNext = pPool->iFreeHead;
4101 pPool->iFreeHead = pPage->idx;
4102 pPage->enmKind = PGMPOOLKIND_FREE;
4103 pPage->GCPhys = NIL_RTGCPHYS;
4104 pPage->fReusedFlushPending = false;
4105
4106 pPool->cUsedPages--;
4107 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4108 return rc;
4109}
4110
4111
4112/**
4113 * Frees a usage of a pool page.
4114 *
4115 * The caller is responsible to updating the user table so that it no longer
4116 * references the shadow page.
4117 *
4118 * @param pPool The pool.
4119 * @param HCPhys The HC physical address of the shadow page.
4120 * @param iUser The shadow page pool index of the user table.
4121 * @param iUserTable The index into the user table (shadowed).
4122 */
4123void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4124{
4125 STAM_PROFILE_START(&pPool->StatFree, a);
4126 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%d} iUser=%#x iUserTable=%#x\n",
4127 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, iUser, iUserTable));
4128 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4129#ifdef PGMPOOL_WITH_USER_TRACKING
4130 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4131#endif
4132#ifdef PGMPOOL_WITH_CACHE
4133 if (!pPage->fCached)
4134#endif
4135 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
4136 STAM_PROFILE_STOP(&pPool->StatFree, a);
4137}
4138
4139
4140/**
4141 * Makes one or more free page free.
4142 *
4143 * @returns VBox status code.
4144 * @retval VINF_SUCCESS on success.
4145 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4146 *
4147 * @param pPool The pool.
4148 * @param iUser The user of the page.
4149 */
4150static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, uint16_t iUser)
4151{
4152 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
4153
4154 /*
4155 * If the pool isn't full grown yet, expand it.
4156 */
4157 if (pPool->cCurPages < pPool->cMaxPages)
4158 {
4159 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4160#ifdef IN_RING3
4161 int rc = PGMR3PoolGrow(pPool->pVMR3);
4162#else
4163 int rc = CTXALLMID(VMM, CallHost)(pPool->CTX_SUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
4164#endif
4165 if (RT_FAILURE(rc))
4166 return rc;
4167 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4168 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4169 return VINF_SUCCESS;
4170 }
4171
4172#ifdef PGMPOOL_WITH_CACHE
4173 /*
4174 * Free one cached page.
4175 */
4176 return pgmPoolCacheFreeOne(pPool, iUser);
4177#else
4178 /*
4179 * Flush the pool.
4180 *
4181 * If we have tracking enabled, it should be possible to come up with
4182 * a cheap replacement strategy...
4183 */
4184 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
4185 Assert(!CPUMIsGuestInLongMode(pVM));
4186 pgmPoolFlushAllInt(pPool);
4187 return VERR_PGM_POOL_FLUSHED;
4188#endif
4189}
4190
4191
4192/**
4193 * Allocates a page from the pool.
4194 *
4195 * This page may actually be a cached page and not in need of any processing
4196 * on the callers part.
4197 *
4198 * @returns VBox status code.
4199 * @retval VINF_SUCCESS if a NEW page was allocated.
4200 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4201 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4202 * @param pVM The VM handle.
4203 * @param GCPhys The GC physical address of the page we're gonna shadow.
4204 * For 4MB and 2MB PD entries, it's the first address the
4205 * shadow PT is covering.
4206 * @param enmKind The kind of mapping.
4207 * @param iUser The shadow page pool index of the user table.
4208 * @param iUserTable The index into the user table (shadowed).
4209 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4210 */
4211int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
4212{
4213 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4214 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4215 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%d iUser=%#x iUserTable=%#x\n", GCPhys, enmKind, iUser, iUserTable));
4216 *ppPage = NULL;
4217 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4218 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4219 * Assert(!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4220
4221#ifdef PGMPOOL_WITH_CACHE
4222 if (pPool->fCacheEnabled)
4223 {
4224 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
4225 if (RT_SUCCESS(rc2))
4226 {
4227 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4228 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4229 return rc2;
4230 }
4231 }
4232#endif
4233
4234 /*
4235 * Allocate a new one.
4236 */
4237 int rc = VINF_SUCCESS;
4238 uint16_t iNew = pPool->iFreeHead;
4239 if (iNew == NIL_PGMPOOL_IDX)
4240 {
4241 rc = pgmPoolMakeMoreFreePages(pPool, iUser);
4242 if (RT_FAILURE(rc))
4243 {
4244 if (rc != VERR_PGM_POOL_CLEARED)
4245 {
4246 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4247 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4248 return rc;
4249 }
4250 Log(("pgmPoolMakeMoreFreePages failed with %Rrc -> return VERR_PGM_POOL_FLUSHED\n", rc));
4251 rc = VERR_PGM_POOL_FLUSHED;
4252 }
4253 iNew = pPool->iFreeHead;
4254 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4255 }
4256
4257 /* unlink the free head */
4258 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4259 pPool->iFreeHead = pPage->iNext;
4260 pPage->iNext = NIL_PGMPOOL_IDX;
4261
4262 /*
4263 * Initialize it.
4264 */
4265 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4266 pPage->enmKind = enmKind;
4267 pPage->GCPhys = GCPhys;
4268 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4269 pPage->fMonitored = false;
4270 pPage->fCached = false;
4271 pPage->fReusedFlushPending = false;
4272 pPage->fCR3Mix = false;
4273#ifdef PGMPOOL_WITH_MONITORING
4274 pPage->cModifications = 0;
4275 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4276 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4277#endif
4278#ifdef PGMPOOL_WITH_USER_TRACKING
4279 pPage->cPresent = 0;
4280 pPage->iFirstPresent = ~0;
4281
4282 /*
4283 * Insert into the tracking and cache. If this fails, free the page.
4284 */
4285 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4286 if (RT_FAILURE(rc3))
4287 {
4288 if (rc3 != VERR_PGM_POOL_CLEARED)
4289 {
4290 pPool->cUsedPages--;
4291 pPage->enmKind = PGMPOOLKIND_FREE;
4292 pPage->GCPhys = NIL_RTGCPHYS;
4293 pPage->iNext = pPool->iFreeHead;
4294 pPool->iFreeHead = pPage->idx;
4295 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4296 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4297 return rc3;
4298 }
4299 Log(("pgmPoolTrackInsert failed with %Rrc -> return VERR_PGM_POOL_FLUSHED\n", rc3));
4300 rc = VERR_PGM_POOL_FLUSHED;
4301 }
4302#endif /* PGMPOOL_WITH_USER_TRACKING */
4303
4304 /*
4305 * Commit the allocation, clear the page and return.
4306 */
4307#ifdef VBOX_WITH_STATISTICS
4308 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4309 pPool->cUsedPagesHigh = pPool->cUsedPages;
4310#endif
4311
4312 if (!pPage->fZeroed)
4313 {
4314 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4315 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4316 ASMMemZeroPage(pv);
4317 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4318 }
4319
4320 *ppPage = pPage;
4321 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4322 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4323 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4324 return rc;
4325}
4326
4327
4328/**
4329 * Frees a usage of a pool page.
4330 *
4331 * @param pVM The VM handle.
4332 * @param HCPhys The HC physical address of the shadow page.
4333 * @param iUser The shadow page pool index of the user table.
4334 * @param iUserTable The index into the user table (shadowed).
4335 */
4336void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4337{
4338 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4339 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4340 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4341}
4342
4343
4344/**
4345 * Gets a in-use page in the pool by it's physical address.
4346 *
4347 * @returns Pointer to the page.
4348 * @param pVM The VM handle.
4349 * @param HCPhys The HC physical address of the shadow page.
4350 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
4351 */
4352PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
4353{
4354 /** @todo profile this! */
4355 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4356 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
4357 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%RHp -> %p:{.idx=%d .GCPhys=%RGp .enmKind=%d}\n",
4358 HCPhys, pPage, pPage->idx, pPage->GCPhys, pPage->enmKind));
4359 return pPage;
4360}
4361
4362
4363/**
4364 * Flushes the entire cache.
4365 *
4366 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4367 * and execute this CR3 flush.
4368 *
4369 * @param pPool The pool.
4370 */
4371void pgmPoolFlushAll(PVM pVM)
4372{
4373 LogFlow(("pgmPoolFlushAll:\n"));
4374 pgmPoolFlushAllInt(pVM->pgm.s.CTX_SUFF(pPool));
4375}
4376
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette