VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 39070

Last change on this file since 39070 was 39070, checked in by vboxsync, 13 years ago

VMM,IPRT: -Wunused-function.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 208.1 KB
Line 
1/* $Id: PGMAllPool.cpp 39070 2011-10-21 09:41:18Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2010 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#include <VBox/vmm/pgm.h>
24#include <VBox/vmm/mm.h>
25#include <VBox/vmm/em.h>
26#include <VBox/vmm/cpum.h>
27#ifdef IN_RC
28# include <VBox/vmm/patm.h>
29#endif
30#include "PGMInternal.h"
31#include <VBox/vmm/vm.h>
32#include "PGMInline.h"
33#include <VBox/disopcode.h>
34#include <VBox/vmm/hwacc_vmx.h>
35
36#include <VBox/log.h>
37#include <VBox/err.h>
38#include <iprt/asm.h>
39#include <iprt/asm-amd64-x86.h>
40#include <iprt/string.h>
41
42
43/*******************************************************************************
44* Internal Functions *
45*******************************************************************************/
46RT_C_DECLS_BEGIN
47DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
48DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
49static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
50static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
51static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
52#ifndef IN_RING3
53DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
54#endif
55#ifdef LOG_ENABLED
56static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
57#endif
58#if 0 /*defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)*/
59static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
60#endif
61
62int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
63PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
64void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
65void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
66
67RT_C_DECLS_END
68
69
70/**
71 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
72 *
73 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
74 * @param enmKind The page kind.
75 */
76DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
77{
78 switch (enmKind)
79 {
80 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
81 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
82 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
83 return true;
84 default:
85 return false;
86 }
87}
88
89
90/**
91 * Flushes a chain of pages sharing the same access monitor.
92 *
93 * @returns VBox status code suitable for scheduling.
94 * @param pPool The pool.
95 * @param pPage A page in the chain.
96 * @todo VBOXSTRICTRC
97 */
98int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
99{
100 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
101
102 /*
103 * Find the list head.
104 */
105 uint16_t idx = pPage->idx;
106 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
107 {
108 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
109 {
110 idx = pPage->iMonitoredPrev;
111 Assert(idx != pPage->idx);
112 pPage = &pPool->aPages[idx];
113 }
114 }
115
116 /*
117 * Iterate the list flushing each shadow page.
118 */
119 int rc = VINF_SUCCESS;
120 for (;;)
121 {
122 idx = pPage->iMonitoredNext;
123 Assert(idx != pPage->idx);
124 if (pPage->idx >= PGMPOOL_IDX_FIRST)
125 {
126 int rc2 = pgmPoolFlushPage(pPool, pPage);
127 AssertRC(rc2);
128 }
129 /* next */
130 if (idx == NIL_PGMPOOL_IDX)
131 break;
132 pPage = &pPool->aPages[idx];
133 }
134 return rc;
135}
136
137
138/**
139 * Wrapper for getting the current context pointer to the entry being modified.
140 *
141 * @returns VBox status code suitable for scheduling.
142 * @param pVM VM Handle.
143 * @param pvDst Destination address
144 * @param pvSrc Source guest virtual address.
145 * @param GCPhysSrc The source guest physical address.
146 * @param cb Size of data to read
147 */
148DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
149{
150#if defined(IN_RING3)
151 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
152 return VINF_SUCCESS;
153#else
154 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
155 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
156#endif
157}
158
159/**
160 * Process shadow entries before they are changed by the guest.
161 *
162 * For PT entries we will clear them. For PD entries, we'll simply check
163 * for mapping conflicts and set the SyncCR3 FF if found.
164 *
165 * @param pVCpu VMCPU handle
166 * @param pPool The pool.
167 * @param pPage The head page.
168 * @param GCPhysFault The guest physical fault address.
169 * @param uAddress In R0 and GC this is the guest context fault address (flat).
170 * In R3 this is the host context 'fault' address.
171 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
172 */
173void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, unsigned cbWrite)
174{
175 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
176 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
177 PVM pVM = pPool->CTX_SUFF(pVM);
178
179 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))pvAddress, GCPhysFault, cbWrite));
180
181 for (;;)
182 {
183 union
184 {
185 void *pv;
186 PX86PT pPT;
187 PPGMSHWPTPAE pPTPae;
188 PX86PD pPD;
189 PX86PDPAE pPDPae;
190 PX86PDPT pPDPT;
191 PX86PML4 pPML4;
192 } uShw;
193
194 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s\n", pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
195
196 uShw.pv = NULL;
197 switch (pPage->enmKind)
198 {
199 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
200 {
201 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
202 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
203 const unsigned iShw = off / sizeof(X86PTE);
204 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
205 if (uShw.pPT->a[iShw].n.u1Present)
206 {
207 X86PTE GstPte;
208
209 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
210 AssertRC(rc);
211 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
212 pgmPoolTracDerefGCPhysHint(pPool, pPage,
213 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
214 GstPte.u & X86_PTE_PG_MASK,
215 iShw);
216 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
217 }
218 break;
219 }
220
221 /* page/2 sized */
222 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
223 {
224 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
225 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
226 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
227 {
228 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
229 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
230 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
231 {
232 X86PTE GstPte;
233 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
234 AssertRC(rc);
235
236 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
237 pgmPoolTracDerefGCPhysHint(pPool, pPage,
238 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
239 GstPte.u & X86_PTE_PG_MASK,
240 iShw);
241 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
242 }
243 }
244 break;
245 }
246
247 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
248 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
249 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
250 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
251 {
252 unsigned iGst = off / sizeof(X86PDE);
253 unsigned iShwPdpt = iGst / 256;
254 unsigned iShw = (iGst % 256) * 2;
255 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
256
257 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
258 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
259 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
260 {
261 for (unsigned i = 0; i < 2; i++)
262 {
263# ifndef IN_RING0
264 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
265 {
266 Assert(pgmMapAreMappingsEnabled(pVM));
267 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
268 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
269 break;
270 }
271 else
272# endif /* !IN_RING0 */
273 if (uShw.pPDPae->a[iShw+i].n.u1Present)
274 {
275 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
276 pgmPoolFree(pVM,
277 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
278 pPage->idx,
279 iShw + i);
280 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw+i].u, 0);
281 }
282
283 /* paranoia / a bit assumptive. */
284 if ( (off & 3)
285 && (off & 3) + cbWrite > 4)
286 {
287 const unsigned iShw2 = iShw + 2 + i;
288 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
289 {
290# ifndef IN_RING0
291 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
292 {
293 Assert(pgmMapAreMappingsEnabled(pVM));
294 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
295 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
296 break;
297 }
298 else
299# endif /* !IN_RING0 */
300 if (uShw.pPDPae->a[iShw2].n.u1Present)
301 {
302 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
303 pgmPoolFree(pVM,
304 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
305 pPage->idx,
306 iShw2);
307 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
308 }
309 }
310 }
311 }
312 }
313 break;
314 }
315
316 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
317 {
318 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
319 const unsigned iShw = off / sizeof(X86PTEPAE);
320 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
321 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
322 {
323 X86PTEPAE GstPte;
324 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
325 AssertRC(rc);
326
327 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
328 pgmPoolTracDerefGCPhysHint(pPool, pPage,
329 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
330 GstPte.u & X86_PTE_PAE_PG_MASK,
331 iShw);
332 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
333 }
334
335 /* paranoia / a bit assumptive. */
336 if ( (off & 7)
337 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
338 {
339 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
340 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
341
342 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
343 {
344 X86PTEPAE GstPte;
345# ifdef IN_RING3
346 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
347# else
348 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
349# endif
350 AssertRC(rc);
351 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
352 pgmPoolTracDerefGCPhysHint(pPool, pPage,
353 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
354 GstPte.u & X86_PTE_PAE_PG_MASK,
355 iShw2);
356 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
357 }
358 }
359 break;
360 }
361
362 case PGMPOOLKIND_32BIT_PD:
363 {
364 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
365 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
366
367 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
368 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
369# ifndef IN_RING0
370 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
371 {
372 Assert(pgmMapAreMappingsEnabled(pVM));
373 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
374 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
375 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
376 break;
377 }
378# endif /* !IN_RING0 */
379# ifndef IN_RING0
380 else
381# endif /* !IN_RING0 */
382 {
383 if (uShw.pPD->a[iShw].n.u1Present)
384 {
385 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
386 pgmPoolFree(pVM,
387 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
388 pPage->idx,
389 iShw);
390 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
391 }
392 }
393 /* paranoia / a bit assumptive. */
394 if ( (off & 3)
395 && (off & 3) + cbWrite > sizeof(X86PTE))
396 {
397 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
398 if ( iShw2 != iShw
399 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
400 {
401# ifndef IN_RING0
402 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
403 {
404 Assert(pgmMapAreMappingsEnabled(pVM));
405 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
406 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
407 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
408 break;
409 }
410# endif /* !IN_RING0 */
411# ifndef IN_RING0
412 else
413# endif /* !IN_RING0 */
414 {
415 if (uShw.pPD->a[iShw2].n.u1Present)
416 {
417 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
418 pgmPoolFree(pVM,
419 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
420 pPage->idx,
421 iShw2);
422 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
423 }
424 }
425 }
426 }
427#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
428 if ( uShw.pPD->a[iShw].n.u1Present
429 && !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
430 {
431 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
432# ifdef IN_RC /* TLB load - we're pushing things a bit... */
433 ASMProbeReadByte(pvAddress);
434# endif
435 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
436 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
437 }
438#endif
439 break;
440 }
441
442 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
443 {
444 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
445 const unsigned iShw = off / sizeof(X86PDEPAE);
446 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
447#ifndef IN_RING0
448 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
449 {
450 Assert(pgmMapAreMappingsEnabled(pVM));
451 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
452 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
453 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
454 break;
455 }
456#endif /* !IN_RING0 */
457 /*
458 * Causes trouble when the guest uses a PDE to refer to the whole page table level
459 * structure. (Invalidate here; faults later on when it tries to change the page
460 * table entries -> recheck; probably only applies to the RC case.)
461 */
462# ifndef IN_RING0
463 else
464# endif /* !IN_RING0 */
465 {
466 if (uShw.pPDPae->a[iShw].n.u1Present)
467 {
468 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
469 pgmPoolFree(pVM,
470 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
471 pPage->idx,
472 iShw);
473 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
474 }
475 }
476 /* paranoia / a bit assumptive. */
477 if ( (off & 7)
478 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
479 {
480 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
481 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
482
483#ifndef IN_RING0
484 if ( iShw2 != iShw
485 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
486 {
487 Assert(pgmMapAreMappingsEnabled(pVM));
488 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
489 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
490 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
491 break;
492 }
493#endif /* !IN_RING0 */
494# ifndef IN_RING0
495 else
496# endif /* !IN_RING0 */
497 if (uShw.pPDPae->a[iShw2].n.u1Present)
498 {
499 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
500 pgmPoolFree(pVM,
501 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
502 pPage->idx,
503 iShw2);
504 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
505 }
506 }
507 break;
508 }
509
510 case PGMPOOLKIND_PAE_PDPT:
511 {
512 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
513 /*
514 * Hopefully this doesn't happen very often:
515 * - touching unused parts of the page
516 * - messing with the bits of pd pointers without changing the physical address
517 */
518 /* PDPT roots are not page aligned; 32 byte only! */
519 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
520
521 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
522 const unsigned iShw = offPdpt / sizeof(X86PDPE);
523 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
524 {
525# ifndef IN_RING0
526 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
527 {
528 Assert(pgmMapAreMappingsEnabled(pVM));
529 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
530 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
531 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
532 break;
533 }
534# endif /* !IN_RING0 */
535# ifndef IN_RING0
536 else
537# endif /* !IN_RING0 */
538 if (uShw.pPDPT->a[iShw].n.u1Present)
539 {
540 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
541 pgmPoolFree(pVM,
542 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
543 pPage->idx,
544 iShw);
545 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
546 }
547
548 /* paranoia / a bit assumptive. */
549 if ( (offPdpt & 7)
550 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
551 {
552 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
553 if ( iShw2 != iShw
554 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
555 {
556# ifndef IN_RING0
557 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
558 {
559 Assert(pgmMapAreMappingsEnabled(pVM));
560 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
561 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
562 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
563 break;
564 }
565# endif /* !IN_RING0 */
566# ifndef IN_RING0
567 else
568# endif /* !IN_RING0 */
569 if (uShw.pPDPT->a[iShw2].n.u1Present)
570 {
571 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
572 pgmPoolFree(pVM,
573 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
574 pPage->idx,
575 iShw2);
576 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
577 }
578 }
579 }
580 }
581 break;
582 }
583
584#ifndef IN_RC
585 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
586 {
587 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
588 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
589 const unsigned iShw = off / sizeof(X86PDEPAE);
590 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
591 if (uShw.pPDPae->a[iShw].n.u1Present)
592 {
593 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
594 pgmPoolFree(pVM,
595 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
596 pPage->idx,
597 iShw);
598 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
599 }
600 /* paranoia / a bit assumptive. */
601 if ( (off & 7)
602 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
603 {
604 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
605 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
606
607 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
608 if (uShw.pPDPae->a[iShw2].n.u1Present)
609 {
610 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
611 pgmPoolFree(pVM,
612 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
613 pPage->idx,
614 iShw2);
615 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
616 }
617 }
618 break;
619 }
620
621 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
622 {
623 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
624 /*
625 * Hopefully this doesn't happen very often:
626 * - messing with the bits of pd pointers without changing the physical address
627 */
628 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
629 const unsigned iShw = off / sizeof(X86PDPE);
630 if (uShw.pPDPT->a[iShw].n.u1Present)
631 {
632 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
633 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
634 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
635 }
636 /* paranoia / a bit assumptive. */
637 if ( (off & 7)
638 && (off & 7) + cbWrite > sizeof(X86PDPE))
639 {
640 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
641 if (uShw.pPDPT->a[iShw2].n.u1Present)
642 {
643 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
644 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
645 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
646 }
647 }
648 break;
649 }
650
651 case PGMPOOLKIND_64BIT_PML4:
652 {
653 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
654 /*
655 * Hopefully this doesn't happen very often:
656 * - messing with the bits of pd pointers without changing the physical address
657 */
658 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
659 const unsigned iShw = off / sizeof(X86PDPE);
660 if (uShw.pPML4->a[iShw].n.u1Present)
661 {
662 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
663 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
664 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
665 }
666 /* paranoia / a bit assumptive. */
667 if ( (off & 7)
668 && (off & 7) + cbWrite > sizeof(X86PDPE))
669 {
670 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
671 if (uShw.pPML4->a[iShw2].n.u1Present)
672 {
673 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
674 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
675 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
676 }
677 }
678 break;
679 }
680#endif /* IN_RING0 */
681
682 default:
683 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
684 }
685 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
686
687 /* next */
688 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
689 return;
690 pPage = &pPool->aPages[pPage->iMonitoredNext];
691 }
692}
693
694# ifndef IN_RING3
695/**
696 * Checks if a access could be a fork operation in progress.
697 *
698 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
699 *
700 * @returns true if it's likely that we're forking, otherwise false.
701 * @param pPool The pool.
702 * @param pDis The disassembled instruction.
703 * @param offFault The access offset.
704 */
705DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
706{
707 /*
708 * i386 linux is using btr to clear X86_PTE_RW.
709 * The functions involved are (2.6.16 source inspection):
710 * clear_bit
711 * ptep_set_wrprotect
712 * copy_one_pte
713 * copy_pte_range
714 * copy_pmd_range
715 * copy_pud_range
716 * copy_page_range
717 * dup_mmap
718 * dup_mm
719 * copy_mm
720 * copy_process
721 * do_fork
722 */
723 if ( pDis->pCurInstr->opcode == OP_BTR
724 && !(offFault & 4)
725 /** @todo Validate that the bit index is X86_PTE_RW. */
726 )
727 {
728 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
729 return true;
730 }
731 return false;
732}
733
734
735/**
736 * Determine whether the page is likely to have been reused.
737 *
738 * @returns true if we consider the page as being reused for a different purpose.
739 * @returns false if we consider it to still be a paging page.
740 * @param pVM VM Handle.
741 * @param pVCpu VMCPU Handle.
742 * @param pRegFrame Trap register frame.
743 * @param pDis The disassembly info for the faulting instruction.
744 * @param pvFault The fault address.
745 *
746 * @remark The REP prefix check is left to the caller because of STOSD/W.
747 */
748DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
749{
750#ifndef IN_RC
751 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
752 if ( HWACCMHasPendingIrq(pVM)
753 && (pRegFrame->rsp - pvFault) < 32)
754 {
755 /* Fault caused by stack writes while trying to inject an interrupt event. */
756 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
757 return true;
758 }
759#else
760 NOREF(pVM); NOREF(pvFault);
761#endif
762
763 LogFlow(("Reused instr %RGv %d at %RGv param1.flags=%x param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->opcode, pvFault, pDis->param1.flags, pDis->param1.base.reg_gen));
764
765 /* Non-supervisor mode write means it's used for something else. */
766 if (CPUMGetGuestCPL(pVCpu, pRegFrame) != 0)
767 return true;
768
769 switch (pDis->pCurInstr->opcode)
770 {
771 /* call implies the actual push of the return address faulted */
772 case OP_CALL:
773 Log4(("pgmPoolMonitorIsReused: CALL\n"));
774 return true;
775 case OP_PUSH:
776 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
777 return true;
778 case OP_PUSHF:
779 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
780 return true;
781 case OP_PUSHA:
782 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
783 return true;
784 case OP_FXSAVE:
785 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
786 return true;
787 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
788 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
789 return true;
790 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
791 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
792 return true;
793 case OP_MOVSWD:
794 case OP_STOSWD:
795 if ( pDis->prefix == (PREFIX_REP|PREFIX_REX)
796 && pRegFrame->rcx >= 0x40
797 )
798 {
799 Assert(pDis->mode == CPUMODE_64BIT);
800
801 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
802 return true;
803 }
804 return false;
805 }
806 if ( ( (pDis->param1.flags & USE_REG_GEN32)
807 || (pDis->param1.flags & USE_REG_GEN64))
808 && (pDis->param1.base.reg_gen == USE_REG_ESP))
809 {
810 Log4(("pgmPoolMonitorIsReused: ESP\n"));
811 return true;
812 }
813
814 return false;
815}
816
817/**
818 * Flushes the page being accessed.
819 *
820 * @returns VBox status code suitable for scheduling.
821 * @param pVM The VM handle.
822 * @param pVCpu The VMCPU handle.
823 * @param pPool The pool.
824 * @param pPage The pool page (head).
825 * @param pDis The disassembly of the write instruction.
826 * @param pRegFrame The trap register frame.
827 * @param GCPhysFault The fault address as guest physical address.
828 * @param pvFault The fault address.
829 * @todo VBOXSTRICTRC
830 */
831static int pgmPoolAccessHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
832 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
833{
834 /*
835 * First, do the flushing.
836 */
837 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
838
839 /*
840 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
841 * Must do this in raw mode (!); XP boot will fail otherwise.
842 */
843 uint32_t cbWritten;
844 VBOXSTRICTRC rc2 = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL, &cbWritten);
845 if (RT_SUCCESS(rc2))
846 {
847 pRegFrame->rip += pDis->opsize;
848 AssertMsg(rc2 == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
849 }
850 else if (rc2 == VERR_EM_INTERPRETER)
851 {
852#ifdef IN_RC
853 if (PATMIsPatchGCAddr(pVM, pRegFrame->eip))
854 {
855 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
856 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
857 rc = VINF_SUCCESS;
858 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
859 }
860 else
861#endif
862 {
863 rc = VINF_EM_RAW_EMULATE_INSTR;
864 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
865 }
866 }
867 else
868 rc = VBOXSTRICTRC_VAL(rc2);
869
870 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
871 return rc;
872}
873
874/**
875 * Handles the STOSD write accesses.
876 *
877 * @returns VBox status code suitable for scheduling.
878 * @param pVM The VM handle.
879 * @param pPool The pool.
880 * @param pPage The pool page (head).
881 * @param pDis The disassembly of the write instruction.
882 * @param pRegFrame The trap register frame.
883 * @param GCPhysFault The fault address as guest physical address.
884 * @param pvFault The fault address.
885 */
886DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
887 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
888{
889 unsigned uIncrement = pDis->param1.size;
890
891 Assert(pDis->mode == CPUMODE_32BIT || pDis->mode == CPUMODE_64BIT);
892 Assert(pRegFrame->rcx <= 0x20);
893
894#ifdef VBOX_STRICT
895 if (pDis->opmode == CPUMODE_32BIT)
896 Assert(uIncrement == 4);
897 else
898 Assert(uIncrement == 8);
899#endif
900
901 Log3(("pgmPoolAccessHandlerSTOSD\n"));
902
903 /*
904 * Increment the modification counter and insert it into the list
905 * of modified pages the first time.
906 */
907 if (!pPage->cModifications++)
908 pgmPoolMonitorModifiedInsert(pPool, pPage);
909
910 /*
911 * Execute REP STOSD.
912 *
913 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
914 * write situation, meaning that it's safe to write here.
915 */
916 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
917 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
918 while (pRegFrame->rcx)
919 {
920#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
921 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
922 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
923 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
924#else
925 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
926#endif
927#ifdef IN_RC
928 *(uint32_t *)(uintptr_t)pu32 = pRegFrame->eax;
929#else
930 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
931#endif
932 pu32 += uIncrement;
933 GCPhysFault += uIncrement;
934 pRegFrame->rdi += uIncrement;
935 pRegFrame->rcx--;
936 }
937 pRegFrame->rip += pDis->opsize;
938
939 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
940 return VINF_SUCCESS;
941}
942
943
944/**
945 * Handles the simple write accesses.
946 *
947 * @returns VBox status code suitable for scheduling.
948 * @param pVM The VM handle.
949 * @param pVCpu The VMCPU handle.
950 * @param pPool The pool.
951 * @param pPage The pool page (head).
952 * @param pDis The disassembly of the write instruction.
953 * @param pRegFrame The trap register frame.
954 * @param GCPhysFault The fault address as guest physical address.
955 * @param pvFault The fault address.
956 * @param pfReused Reused state (out)
957 */
958DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
959 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
960{
961 Log3(("pgmPoolAccessHandlerSimple\n"));
962 /*
963 * Increment the modification counter and insert it into the list
964 * of modified pages the first time.
965 */
966 if (!pPage->cModifications++)
967 pgmPoolMonitorModifiedInsert(pPool, pPage);
968
969 /*
970 * Clear all the pages. ASSUMES that pvFault is readable.
971 */
972#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
973 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
974 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
975 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
976#else
977 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
978#endif
979
980 /*
981 * Interpret the instruction.
982 */
983 uint32_t cb;
984 VBOXSTRICTRC rc = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL, &cb);
985 if (RT_SUCCESS(rc))
986 {
987 pRegFrame->rip += pDis->opsize;
988 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
989 }
990 else if (rc == VERR_EM_INTERPRETER)
991 {
992 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
993 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode));
994 rc = VINF_EM_RAW_EMULATE_INSTR;
995 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
996 }
997
998#if 0 /* experimental code */
999 if (rc == VINF_SUCCESS)
1000 {
1001 switch (pPage->enmKind)
1002 {
1003 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1004 {
1005 X86PTEPAE GstPte;
1006 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1007 AssertRC(rc);
1008
1009 /* Check the new value written by the guest. If present and with a bogus physical address, then
1010 * it's fairly safe to assume the guest is reusing the PT.
1011 */
1012 if (GstPte.n.u1Present)
1013 {
1014 RTHCPHYS HCPhys = -1;
1015 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1016 if (rc != VINF_SUCCESS)
1017 {
1018 *pfReused = true;
1019 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1020 }
1021 }
1022 break;
1023 }
1024 }
1025 }
1026#endif
1027
1028 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", VBOXSTRICTRC_VAL(rc), cb));
1029 return VBOXSTRICTRC_VAL(rc);
1030}
1031
1032/**
1033 * \#PF Handler callback for PT write accesses.
1034 *
1035 * @returns VBox status code (appropriate for GC return).
1036 * @param pVM VM Handle.
1037 * @param uErrorCode CPU Error code.
1038 * @param pRegFrame Trap register frame.
1039 * NULL on DMA and other non CPU access.
1040 * @param pvFault The fault address (cr2).
1041 * @param GCPhysFault The GC physical address corresponding to pvFault.
1042 * @param pvUser User argument.
1043 */
1044DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1045{
1046 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1047 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1048 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1049 PVMCPU pVCpu = VMMGetCpu(pVM);
1050 unsigned cMaxModifications;
1051 bool fForcedFlush = false;
1052
1053 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1054
1055 pgmLock(pVM);
1056 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1057 {
1058 /* Pool page changed while we were waiting for the lock; ignore. */
1059 Log(("CPU%d: pgmPoolAccessHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1060 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1061 pgmUnlock(pVM);
1062 return VINF_SUCCESS;
1063 }
1064#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1065 if (pPage->fDirty)
1066 {
1067 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH));
1068 pgmUnlock(pVM);
1069 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1070 }
1071#endif
1072
1073#if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1074 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1075 {
1076 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1077 void *pvGst;
1078 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1079 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1080 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1081 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1082 }
1083#endif
1084
1085 /*
1086 * Disassemble the faulting instruction.
1087 */
1088 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1089 int rc = EMInterpretDisasOne(pVM, pVCpu, pRegFrame, pDis, NULL);
1090 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1091 {
1092 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1093 pgmUnlock(pVM);
1094 return rc;
1095 }
1096
1097 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1098
1099 /*
1100 * We should ALWAYS have the list head as user parameter. This
1101 * is because we use that page to record the changes.
1102 */
1103 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1104
1105#ifdef IN_RING0
1106 /* Maximum nr of modifications depends on the page type. */
1107 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1108 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1109 cMaxModifications = 4;
1110 else
1111 cMaxModifications = 24;
1112#else
1113 cMaxModifications = 48;
1114#endif
1115
1116 /*
1117 * Incremental page table updates should weigh more than random ones.
1118 * (Only applies when started from offset 0)
1119 */
1120 pVCpu->pgm.s.cPoolAccessHandler++;
1121 if ( pPage->pvLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1122 && pPage->pvLastAccessHandlerRip < pRegFrame->rip + 0x40
1123 && pvFault == (pPage->pvLastAccessHandlerFault + pDis->param1.size)
1124 && pVCpu->pgm.s.cPoolAccessHandler == (pPage->cLastAccessHandlerCount + 1))
1125 {
1126 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1127 Assert(pPage->cModifications < 32000);
1128 pPage->cModifications = pPage->cModifications * 2;
1129 pPage->pvLastAccessHandlerFault = pvFault;
1130 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1131 if (pPage->cModifications >= cMaxModifications)
1132 {
1133 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1134 fForcedFlush = true;
1135 }
1136 }
1137
1138 if (pPage->cModifications >= cMaxModifications)
1139 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1140
1141 /*
1142 * Check if it's worth dealing with.
1143 */
1144 bool fReused = false;
1145 bool fNotReusedNotForking = false;
1146 if ( ( pPage->cModifications < cMaxModifications /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1147 || pgmPoolIsPageLocked(pPage)
1148 )
1149 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1150 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1151 {
1152 /*
1153 * Simple instructions, no REP prefix.
1154 */
1155 if (!(pDis->prefix & (PREFIX_REP | PREFIX_REPNE)))
1156 {
1157 rc = pgmPoolAccessHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1158 if (fReused)
1159 goto flushPage;
1160
1161 /* A mov instruction to change the first page table entry will be remembered so we can detect
1162 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1163 */
1164 if ( rc == VINF_SUCCESS
1165 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1166 && pDis->pCurInstr->opcode == OP_MOV
1167 && (pvFault & PAGE_OFFSET_MASK) == 0)
1168 {
1169 pPage->pvLastAccessHandlerFault = pvFault;
1170 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1171 pPage->pvLastAccessHandlerRip = pRegFrame->rip;
1172 /* Make sure we don't kick out a page too quickly. */
1173 if (pPage->cModifications > 8)
1174 pPage->cModifications = 2;
1175 }
1176 else
1177 if (pPage->pvLastAccessHandlerFault == pvFault)
1178 {
1179 /* ignore the 2nd write to this page table entry. */
1180 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1181 }
1182 else
1183 {
1184 pPage->pvLastAccessHandlerFault = 0;
1185 pPage->pvLastAccessHandlerRip = 0;
1186 }
1187
1188 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1189 pgmUnlock(pVM);
1190 return rc;
1191 }
1192
1193 /*
1194 * Windows is frequently doing small memset() operations (netio test 4k+).
1195 * We have to deal with these or we'll kill the cache and performance.
1196 */
1197 if ( pDis->pCurInstr->opcode == OP_STOSWD
1198 && !pRegFrame->eflags.Bits.u1DF
1199 && pDis->opmode == pDis->mode
1200 && pDis->addrmode == pDis->mode)
1201 {
1202 bool fValidStosd = false;
1203
1204 if ( pDis->mode == CPUMODE_32BIT
1205 && pDis->prefix == PREFIX_REP
1206 && pRegFrame->ecx <= 0x20
1207 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1208 && !((uintptr_t)pvFault & 3)
1209 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1210 )
1211 {
1212 fValidStosd = true;
1213 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1214 }
1215 else
1216 if ( pDis->mode == CPUMODE_64BIT
1217 && pDis->prefix == (PREFIX_REP | PREFIX_REX)
1218 && pRegFrame->rcx <= 0x20
1219 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1220 && !((uintptr_t)pvFault & 7)
1221 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1222 )
1223 {
1224 fValidStosd = true;
1225 }
1226
1227 if (fValidStosd)
1228 {
1229 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1230 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1231 pgmUnlock(pVM);
1232 return rc;
1233 }
1234 }
1235
1236 /* REP prefix, don't bother. */
1237 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1238 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1239 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode, pDis->prefix));
1240 fNotReusedNotForking = true;
1241 }
1242
1243#if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1244 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1245 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1246 */
1247 if ( pPage->cModifications >= cMaxModifications
1248 && !fForcedFlush
1249 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1250 && ( fNotReusedNotForking
1251 || ( !pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
1252 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1253 )
1254 )
1255 {
1256 Assert(!pgmPoolIsPageLocked(pPage));
1257 Assert(pPage->fDirty == false);
1258
1259 /* Flush any monitored duplicates as we will disable write protection. */
1260 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1261 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1262 {
1263 PPGMPOOLPAGE pPageHead = pPage;
1264
1265 /* Find the monitor head. */
1266 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1267 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1268
1269 while (pPageHead)
1270 {
1271 unsigned idxNext = pPageHead->iMonitoredNext;
1272
1273 if (pPageHead != pPage)
1274 {
1275 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1276 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1277 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1278 AssertRC(rc2);
1279 }
1280
1281 if (idxNext == NIL_PGMPOOL_IDX)
1282 break;
1283
1284 pPageHead = &pPool->aPages[idxNext];
1285 }
1286 }
1287
1288 /* The flushing above might fail for locked pages, so double check. */
1289 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1290 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1291 {
1292 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1293
1294 /* Temporarily allow write access to the page table again. */
1295 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK, pPage->GCPhys & PAGE_BASE_GC_MASK);
1296 if (rc == VINF_SUCCESS)
1297 {
1298 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1299 AssertMsg(rc == VINF_SUCCESS
1300 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1301 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1302 || rc == VERR_PAGE_NOT_PRESENT,
1303 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1304
1305 pPage->pvDirtyFault = pvFault;
1306
1307 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1308 pgmUnlock(pVM);
1309 return rc;
1310 }
1311 }
1312 }
1313#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1314
1315 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushModOverflow));
1316flushPage:
1317 /*
1318 * Not worth it, so flush it.
1319 *
1320 * If we considered it to be reused, don't go back to ring-3
1321 * to emulate failed instructions since we usually cannot
1322 * interpret then. This may be a bit risky, in which case
1323 * the reuse detection must be fixed.
1324 */
1325 rc = pgmPoolAccessHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1326 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1327 && fReused)
1328 {
1329 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1330 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1331 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1332 }
1333 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1334 pgmUnlock(pVM);
1335 return rc;
1336}
1337
1338# endif /* !IN_RING3 */
1339
1340# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1341
1342# if defined(VBOX_STRICT) && !defined(IN_RING3)
1343
1344/**
1345 * Check references to guest physical memory in a PAE / PAE page table.
1346 *
1347 * @param pPool The pool.
1348 * @param pPage The page.
1349 * @param pShwPT The shadow page table (mapping of the page).
1350 * @param pGstPT The guest page table.
1351 */
1352static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1353{
1354 unsigned cErrors = 0;
1355 int LastRc = -1; /* initialized to shut up gcc */
1356 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1357 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1358 PVM pVM = pPool->CTX_SUFF(pVM);
1359
1360#ifdef VBOX_STRICT
1361 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1362 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1363#endif
1364 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1365 {
1366 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1367 {
1368 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1369 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1370 if ( rc != VINF_SUCCESS
1371 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1372 {
1373 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1374 LastPTE = i;
1375 LastRc = rc;
1376 LastHCPhys = HCPhys;
1377 cErrors++;
1378
1379 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1380 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1381 AssertRC(rc);
1382
1383 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1384 {
1385 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1386
1387 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1388 {
1389 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1390
1391 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1392 {
1393 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1394 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1395 {
1396 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1397 }
1398 }
1399
1400 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1401 }
1402 }
1403 }
1404 }
1405 }
1406 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1407}
1408
1409/**
1410 * Check references to guest physical memory in a PAE / 32-bit page table.
1411 *
1412 * @param pPool The pool.
1413 * @param pPage The page.
1414 * @param pShwPT The shadow page table (mapping of the page).
1415 * @param pGstPT The guest page table.
1416 */
1417static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1418{
1419 unsigned cErrors = 0;
1420 int LastRc = -1; /* initialized to shut up gcc */
1421 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1422 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1423 PVM pVM = pPool->CTX_SUFF(pVM);
1424
1425#ifdef VBOX_STRICT
1426 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1427 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1428#endif
1429 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1430 {
1431 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1432 {
1433 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1434 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1435 if ( rc != VINF_SUCCESS
1436 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1437 {
1438 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1439 LastPTE = i;
1440 LastRc = rc;
1441 LastHCPhys = HCPhys;
1442 cErrors++;
1443
1444 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1445 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1446 AssertRC(rc);
1447
1448 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1449 {
1450 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1451
1452 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1453 {
1454 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1455
1456 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1457 {
1458 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1459 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1460 {
1461 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1462 }
1463 }
1464
1465 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1466 }
1467 }
1468 }
1469 }
1470 }
1471 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1472}
1473
1474# endif /* VBOX_STRICT && !IN_RING3 */
1475
1476/**
1477 * Clear references to guest physical memory in a PAE / PAE page table.
1478 *
1479 * @returns nr of changed PTEs
1480 * @param pPool The pool.
1481 * @param pPage The page.
1482 * @param pShwPT The shadow page table (mapping of the page).
1483 * @param pGstPT The guest page table.
1484 * @param pOldGstPT The old cached guest page table.
1485 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1486 * @param pfFlush Flush reused page table (out)
1487 */
1488DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1489 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1490{
1491 unsigned cChanged = 0;
1492
1493#ifdef VBOX_STRICT
1494 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1495 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1496#endif
1497 *pfFlush = false;
1498
1499 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1500 {
1501 /* Check the new value written by the guest. If present and with a bogus physical address, then
1502 * it's fairly safe to assume the guest is reusing the PT.
1503 */
1504 if ( fAllowRemoval
1505 && pGstPT->a[i].n.u1Present)
1506 {
1507 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1508 {
1509 *pfFlush = true;
1510 return ++cChanged;
1511 }
1512 }
1513 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1514 {
1515 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1516 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1517 {
1518#ifdef VBOX_STRICT
1519 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1520 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1521 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1522#endif
1523 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1524 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1525 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1526 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1527
1528 if ( uHostAttr == uGuestAttr
1529 && fHostRW <= fGuestRW)
1530 continue;
1531 }
1532 cChanged++;
1533 /* Something was changed, so flush it. */
1534 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1535 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1536 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1537 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1538 }
1539 }
1540 return cChanged;
1541}
1542
1543/**
1544 * Clear references to guest physical memory in a PAE / PAE page table.
1545 *
1546 * @returns nr of changed PTEs
1547 * @param pPool The pool.
1548 * @param pPage The page.
1549 * @param pShwPT The shadow page table (mapping of the page).
1550 * @param pGstPT The guest page table.
1551 * @param pOldGstPT The old cached guest page table.
1552 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1553 * @param pfFlush Flush reused page table (out)
1554 */
1555DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1556 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1557{
1558 unsigned cChanged = 0;
1559
1560#ifdef VBOX_STRICT
1561 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1562 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1563#endif
1564 *pfFlush = false;
1565
1566 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1567 {
1568 /* Check the new value written by the guest. If present and with a bogus physical address, then
1569 * it's fairly safe to assume the guest is reusing the PT.
1570 */
1571 if ( fAllowRemoval
1572 && pGstPT->a[i].n.u1Present)
1573 {
1574 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK))
1575 {
1576 *pfFlush = true;
1577 return ++cChanged;
1578 }
1579 }
1580 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1581 {
1582 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1583 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1584 {
1585#ifdef VBOX_STRICT
1586 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1587 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1588 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1589#endif
1590 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1591 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1592 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1593 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1594
1595 if ( uHostAttr == uGuestAttr
1596 && fHostRW <= fGuestRW)
1597 continue;
1598 }
1599 cChanged++;
1600 /* Something was changed, so flush it. */
1601 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1602 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1603 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1604 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1605 }
1606 }
1607 return cChanged;
1608}
1609
1610/**
1611 * Flush a dirty page
1612 *
1613 * @param pVM VM Handle.
1614 * @param pPool The pool.
1615 * @param idxSlot Dirty array slot index
1616 * @param fAllowRemoval Allow a reused page table to be removed
1617 */
1618static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1619{
1620 PPGMPOOLPAGE pPage;
1621 unsigned idxPage;
1622
1623 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1624 if (pPool->aDirtyPages[idxSlot].uIdx == NIL_PGMPOOL_IDX)
1625 return;
1626
1627 idxPage = pPool->aDirtyPages[idxSlot].uIdx;
1628 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1629 pPage = &pPool->aPages[idxPage];
1630 Assert(pPage->idx == idxPage);
1631 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1632
1633 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1634 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1635
1636#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1637 PVMCPU pVCpu = VMMGetCpu(pVM);
1638 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
1639#endif
1640
1641 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1642 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK);
1643 Assert(rc == VINF_SUCCESS);
1644 pPage->fDirty = false;
1645
1646#ifdef VBOX_STRICT
1647 uint64_t fFlags = 0;
1648 RTHCPHYS HCPhys;
1649 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->pvDirtyFault, &fFlags, &HCPhys);
1650 AssertMsg( ( rc == VINF_SUCCESS
1651 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1652 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1653 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1654 || rc == VERR_PAGE_NOT_PRESENT,
1655 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->pvDirtyFault, rc, fFlags));
1656#endif
1657
1658 /* Flush those PTEs that have changed. */
1659 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1660 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1661 void *pvGst;
1662 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1663 bool fFlush;
1664 unsigned cChanges;
1665
1666 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1667 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1668 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1669 else
1670 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1671 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1672
1673 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1674 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1675 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1676 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1677
1678 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1679 Assert(pPage->cModifications);
1680 if (cChanges < 4)
1681 pPage->cModifications = 1; /* must use > 0 here */
1682 else
1683 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1684
1685 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1686 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1687 pPool->idxFreeDirtyPage = idxSlot;
1688
1689 pPool->cDirtyPages--;
1690 pPool->aDirtyPages[idxSlot].uIdx = NIL_PGMPOOL_IDX;
1691 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1692 if (fFlush)
1693 {
1694 Assert(fAllowRemoval);
1695 Log(("Flush reused page table!\n"));
1696 pgmPoolFlushPage(pPool, pPage);
1697 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1698 }
1699 else
1700 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1701
1702#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1703 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
1704#endif
1705}
1706
1707# ifndef IN_RING3
1708/**
1709 * Add a new dirty page
1710 *
1711 * @param pVM VM Handle.
1712 * @param pPool The pool.
1713 * @param pPage The page.
1714 */
1715void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1716{
1717 unsigned idxFree;
1718
1719 PGM_LOCK_ASSERT_OWNER(pVM);
1720 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1721 Assert(!pPage->fDirty);
1722
1723 idxFree = pPool->idxFreeDirtyPage;
1724 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1725 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1726
1727 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1728 {
1729 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1730 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1731 }
1732 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1733 AssertMsg(pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1734
1735 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1736
1737 /*
1738 * Make a copy of the guest page table as we require valid GCPhys addresses
1739 * when removing references to physical pages.
1740 * (The HCPhys linear lookup is *extremely* expensive!)
1741 */
1742 void *pvGst;
1743 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1744 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst, (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT) ? PAGE_SIZE : PAGE_SIZE/2);
1745# ifdef VBOX_STRICT
1746 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1747 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1748 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1749 else
1750 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1751 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1752# endif
1753 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1754
1755 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1756 pPage->fDirty = true;
1757 pPage->idxDirty = idxFree;
1758 pPool->aDirtyPages[idxFree].uIdx = pPage->idx;
1759 pPool->cDirtyPages++;
1760
1761 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1762 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1763 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1764 {
1765 unsigned i;
1766 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1767 {
1768 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1769 if (pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX)
1770 {
1771 pPool->idxFreeDirtyPage = idxFree;
1772 break;
1773 }
1774 }
1775 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1776 }
1777
1778 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX);
1779 return;
1780}
1781# endif /* !IN_RING3 */
1782
1783/**
1784 * Check if the specified page is dirty (not write monitored)
1785 *
1786 * @return dirty or not
1787 * @param pVM VM Handle.
1788 * @param GCPhys Guest physical address
1789 */
1790bool pgmPoolIsDirtyPage(PVM pVM, RTGCPHYS GCPhys)
1791{
1792 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1793 PGM_LOCK_ASSERT_OWNER(pVM);
1794 if (!pPool->cDirtyPages)
1795 return false;
1796
1797 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1798
1799 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1800 {
1801 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
1802 {
1803 PPGMPOOLPAGE pPage;
1804 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
1805
1806 pPage = &pPool->aPages[idxPage];
1807 if (pPage->GCPhys == GCPhys)
1808 return true;
1809 }
1810 }
1811 return false;
1812}
1813
1814/**
1815 * Reset all dirty pages by reinstating page monitoring.
1816 *
1817 * @param pVM VM Handle.
1818 */
1819void pgmPoolResetDirtyPages(PVM pVM)
1820{
1821 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1822 PGM_LOCK_ASSERT_OWNER(pVM);
1823 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1824
1825 if (!pPool->cDirtyPages)
1826 return;
1827
1828 Log(("pgmPoolResetDirtyPages\n"));
1829 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1830 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1831
1832 pPool->idxFreeDirtyPage = 0;
1833 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1834 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1835 {
1836 unsigned i;
1837 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1838 {
1839 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
1840 {
1841 pPool->idxFreeDirtyPage = i;
1842 break;
1843 }
1844 }
1845 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1846 }
1847
1848 Assert(pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
1849 return;
1850}
1851
1852/**
1853 * Invalidate the PT entry for the specified page
1854 *
1855 * @param pVM VM Handle.
1856 * @param GCPtrPage Guest page to invalidate
1857 */
1858void pgmPoolResetDirtyPage(PVM pVM, RTGCPTR GCPtrPage)
1859{
1860 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1861 PGM_LOCK_ASSERT_OWNER(pVM);
1862 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1863
1864 if (!pPool->cDirtyPages)
1865 return;
1866
1867 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage));
1868 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1869 {
1870 }
1871}
1872
1873/**
1874 * Reset all dirty pages by reinstating page monitoring.
1875 *
1876 * @param pVM VM Handle.
1877 * @param GCPhysPT Physical address of the page table
1878 */
1879void pgmPoolInvalidateDirtyPage(PVM pVM, RTGCPHYS GCPhysPT)
1880{
1881 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1882 PGM_LOCK_ASSERT_OWNER(pVM);
1883 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1884 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
1885
1886 if (!pPool->cDirtyPages)
1887 return;
1888
1889 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1890
1891 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1892 {
1893 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
1894 {
1895 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
1896
1897 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1898 if (pPage->GCPhys == GCPhysPT)
1899 {
1900 idxDirtyPage = i;
1901 break;
1902 }
1903 }
1904 }
1905
1906 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
1907 {
1908 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
1909 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1910 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1911 {
1912 unsigned i;
1913 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1914 {
1915 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
1916 {
1917 pPool->idxFreeDirtyPage = i;
1918 break;
1919 }
1920 }
1921 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1922 }
1923 }
1924}
1925
1926# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1927
1928/**
1929 * Inserts a page into the GCPhys hash table.
1930 *
1931 * @param pPool The pool.
1932 * @param pPage The page.
1933 */
1934DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1935{
1936 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1937 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1938 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1939 pPage->iNext = pPool->aiHash[iHash];
1940 pPool->aiHash[iHash] = pPage->idx;
1941}
1942
1943
1944/**
1945 * Removes a page from the GCPhys hash table.
1946 *
1947 * @param pPool The pool.
1948 * @param pPage The page.
1949 */
1950DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1951{
1952 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1953 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1954 if (pPool->aiHash[iHash] == pPage->idx)
1955 pPool->aiHash[iHash] = pPage->iNext;
1956 else
1957 {
1958 uint16_t iPrev = pPool->aiHash[iHash];
1959 for (;;)
1960 {
1961 const int16_t i = pPool->aPages[iPrev].iNext;
1962 if (i == pPage->idx)
1963 {
1964 pPool->aPages[iPrev].iNext = pPage->iNext;
1965 break;
1966 }
1967 if (i == NIL_PGMPOOL_IDX)
1968 {
1969 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
1970 break;
1971 }
1972 iPrev = i;
1973 }
1974 }
1975 pPage->iNext = NIL_PGMPOOL_IDX;
1976}
1977
1978
1979/**
1980 * Frees up one cache page.
1981 *
1982 * @returns VBox status code.
1983 * @retval VINF_SUCCESS on success.
1984 * @param pPool The pool.
1985 * @param iUser The user index.
1986 */
1987static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1988{
1989#ifndef IN_RC
1990 const PVM pVM = pPool->CTX_SUFF(pVM);
1991#endif
1992 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1993 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1994
1995 /*
1996 * Select one page from the tail of the age list.
1997 */
1998 PPGMPOOLPAGE pPage;
1999 for (unsigned iLoop = 0; ; iLoop++)
2000 {
2001 uint16_t iToFree = pPool->iAgeTail;
2002 if (iToFree == iUser)
2003 iToFree = pPool->aPages[iToFree].iAgePrev;
2004/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2005 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2006 {
2007 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2008 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2009 {
2010 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2011 continue;
2012 iToFree = i;
2013 break;
2014 }
2015 }
2016*/
2017 Assert(iToFree != iUser);
2018 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
2019 pPage = &pPool->aPages[iToFree];
2020
2021 /*
2022 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2023 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2024 */
2025 if (!pgmPoolIsPageLocked(pPage))
2026 break;
2027 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2028 pgmPoolCacheUsed(pPool, pPage);
2029 AssertLogRelReturn(iLoop < 8192, VERR_INTERNAL_ERROR);
2030 }
2031
2032 /*
2033 * Found a usable page, flush it and return.
2034 */
2035 int rc = pgmPoolFlushPage(pPool, pPage);
2036 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2037 /* todo: find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2038 if (rc == VINF_SUCCESS)
2039 PGM_INVL_ALL_VCPU_TLBS(pVM);
2040 return rc;
2041}
2042
2043
2044/**
2045 * Checks if a kind mismatch is really a page being reused
2046 * or if it's just normal remappings.
2047 *
2048 * @returns true if reused and the cached page (enmKind1) should be flushed
2049 * @returns false if not reused.
2050 * @param enmKind1 The kind of the cached page.
2051 * @param enmKind2 The kind of the requested page.
2052 */
2053static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2054{
2055 switch (enmKind1)
2056 {
2057 /*
2058 * Never reuse them. There is no remapping in non-paging mode.
2059 */
2060 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2061 case PGMPOOLKIND_32BIT_PD_PHYS:
2062 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2063 case PGMPOOLKIND_PAE_PD_PHYS:
2064 case PGMPOOLKIND_PAE_PDPT_PHYS:
2065 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2066 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2067 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2068 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2069 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2070 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2071 return false;
2072
2073 /*
2074 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2075 */
2076 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2077 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2078 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2079 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2080 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2081 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2082 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2083 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2084 case PGMPOOLKIND_32BIT_PD:
2085 case PGMPOOLKIND_PAE_PDPT:
2086 switch (enmKind2)
2087 {
2088 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2089 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2090 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2091 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2092 case PGMPOOLKIND_64BIT_PML4:
2093 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2094 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2095 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2096 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2097 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2098 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2099 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2100 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2101 return true;
2102 default:
2103 return false;
2104 }
2105
2106 /*
2107 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2108 */
2109 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2110 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2111 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2112 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2113 case PGMPOOLKIND_64BIT_PML4:
2114 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2115 switch (enmKind2)
2116 {
2117 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2118 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2119 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2120 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2121 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2122 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2123 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2124 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2125 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2126 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2127 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2128 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2129 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2130 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2131 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2132 return true;
2133 default:
2134 return false;
2135 }
2136
2137 /*
2138 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2139 */
2140 case PGMPOOLKIND_ROOT_NESTED:
2141 return false;
2142
2143 default:
2144 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2145 }
2146}
2147
2148
2149/**
2150 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2151 *
2152 * @returns VBox status code.
2153 * @retval VINF_PGM_CACHED_PAGE on success.
2154 * @retval VERR_FILE_NOT_FOUND if not found.
2155 * @param pPool The pool.
2156 * @param GCPhys The GC physical address of the page we're gonna shadow.
2157 * @param enmKind The kind of mapping.
2158 * @param enmAccess Access type for the mapping (only relevant for big pages)
2159 * @param iUser The shadow page pool index of the user table.
2160 * @param iUserTable The index into the user table (shadowed).
2161 * @param ppPage Where to store the pointer to the page.
2162 */
2163static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2164{
2165 /*
2166 * Look up the GCPhys in the hash.
2167 */
2168 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2169 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2170 if (i != NIL_PGMPOOL_IDX)
2171 {
2172 do
2173 {
2174 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2175 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2176 if (pPage->GCPhys == GCPhys)
2177 {
2178 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2179 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess)
2180 {
2181 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2182 * doesn't flush it in case there are no more free use records.
2183 */
2184 pgmPoolCacheUsed(pPool, pPage);
2185
2186 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2187 if (RT_SUCCESS(rc))
2188 {
2189 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2190 *ppPage = pPage;
2191 if (pPage->cModifications)
2192 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2193 STAM_COUNTER_INC(&pPool->StatCacheHits);
2194 return VINF_PGM_CACHED_PAGE;
2195 }
2196 return rc;
2197 }
2198
2199 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2200 {
2201 /*
2202 * The kind is different. In some cases we should now flush the page
2203 * as it has been reused, but in most cases this is normal remapping
2204 * of PDs as PT or big pages using the GCPhys field in a slightly
2205 * different way than the other kinds.
2206 */
2207 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2208 {
2209 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2210 pgmPoolFlushPage(pPool, pPage);
2211 break;
2212 }
2213 }
2214 }
2215
2216 /* next */
2217 i = pPage->iNext;
2218 } while (i != NIL_PGMPOOL_IDX);
2219 }
2220
2221 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2222 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2223 return VERR_FILE_NOT_FOUND;
2224}
2225
2226
2227/**
2228 * Inserts a page into the cache.
2229 *
2230 * @param pPool The pool.
2231 * @param pPage The cached page.
2232 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2233 */
2234static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2235{
2236 /*
2237 * Insert into the GCPhys hash if the page is fit for that.
2238 */
2239 Assert(!pPage->fCached);
2240 if (fCanBeCached)
2241 {
2242 pPage->fCached = true;
2243 pgmPoolHashInsert(pPool, pPage);
2244 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2245 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2246 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2247 }
2248 else
2249 {
2250 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2251 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2252 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2253 }
2254
2255 /*
2256 * Insert at the head of the age list.
2257 */
2258 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2259 pPage->iAgeNext = pPool->iAgeHead;
2260 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2261 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2262 else
2263 pPool->iAgeTail = pPage->idx;
2264 pPool->iAgeHead = pPage->idx;
2265}
2266
2267
2268/**
2269 * Flushes a cached page.
2270 *
2271 * @param pPool The pool.
2272 * @param pPage The cached page.
2273 */
2274static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2275{
2276 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2277
2278 /*
2279 * Remove the page from the hash.
2280 */
2281 if (pPage->fCached)
2282 {
2283 pPage->fCached = false;
2284 pgmPoolHashRemove(pPool, pPage);
2285 }
2286 else
2287 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2288
2289 /*
2290 * Remove it from the age list.
2291 */
2292 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2293 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2294 else
2295 pPool->iAgeTail = pPage->iAgePrev;
2296 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2297 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2298 else
2299 pPool->iAgeHead = pPage->iAgeNext;
2300 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2301 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2302}
2303
2304
2305/**
2306 * Looks for pages sharing the monitor.
2307 *
2308 * @returns Pointer to the head page.
2309 * @returns NULL if not found.
2310 * @param pPool The Pool
2311 * @param pNewPage The page which is going to be monitored.
2312 */
2313static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2314{
2315 /*
2316 * Look up the GCPhys in the hash.
2317 */
2318 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2319 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2320 if (i == NIL_PGMPOOL_IDX)
2321 return NULL;
2322 do
2323 {
2324 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2325 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2326 && pPage != pNewPage)
2327 {
2328 switch (pPage->enmKind)
2329 {
2330 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2331 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2332 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2333 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2334 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2335 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2336 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2337 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2338 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2339 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2340 case PGMPOOLKIND_64BIT_PML4:
2341 case PGMPOOLKIND_32BIT_PD:
2342 case PGMPOOLKIND_PAE_PDPT:
2343 {
2344 /* find the head */
2345 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2346 {
2347 Assert(pPage->iMonitoredPrev != pPage->idx);
2348 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2349 }
2350 return pPage;
2351 }
2352
2353 /* ignore, no monitoring. */
2354 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2355 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2356 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2357 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2358 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2359 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2360 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2361 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2362 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2363 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2364 case PGMPOOLKIND_ROOT_NESTED:
2365 case PGMPOOLKIND_PAE_PD_PHYS:
2366 case PGMPOOLKIND_PAE_PDPT_PHYS:
2367 case PGMPOOLKIND_32BIT_PD_PHYS:
2368 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2369 break;
2370 default:
2371 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2372 }
2373 }
2374
2375 /* next */
2376 i = pPage->iNext;
2377 } while (i != NIL_PGMPOOL_IDX);
2378 return NULL;
2379}
2380
2381
2382/**
2383 * Enabled write monitoring of a guest page.
2384 *
2385 * @returns VBox status code.
2386 * @retval VINF_SUCCESS on success.
2387 * @param pPool The pool.
2388 * @param pPage The cached page.
2389 */
2390static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2391{
2392 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2393
2394 /*
2395 * Filter out the relevant kinds.
2396 */
2397 switch (pPage->enmKind)
2398 {
2399 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2400 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2401 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2402 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2403 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2404 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2405 case PGMPOOLKIND_64BIT_PML4:
2406 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2407 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2408 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2409 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2410 case PGMPOOLKIND_32BIT_PD:
2411 case PGMPOOLKIND_PAE_PDPT:
2412 break;
2413
2414 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2415 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2416 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2417 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2418 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2419 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2420 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2421 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2422 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2423 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2424 case PGMPOOLKIND_ROOT_NESTED:
2425 /* Nothing to monitor here. */
2426 return VINF_SUCCESS;
2427
2428 case PGMPOOLKIND_32BIT_PD_PHYS:
2429 case PGMPOOLKIND_PAE_PDPT_PHYS:
2430 case PGMPOOLKIND_PAE_PD_PHYS:
2431 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2432 /* Nothing to monitor here. */
2433 return VINF_SUCCESS;
2434 default:
2435 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2436 }
2437
2438 /*
2439 * Install handler.
2440 */
2441 int rc;
2442 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2443 if (pPageHead)
2444 {
2445 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2446 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2447
2448#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2449 if (pPageHead->fDirty)
2450 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirty, false /* do not remove */);
2451#endif
2452
2453 pPage->iMonitoredPrev = pPageHead->idx;
2454 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2455 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2456 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2457 pPageHead->iMonitoredNext = pPage->idx;
2458 rc = VINF_SUCCESS;
2459 }
2460 else
2461 {
2462 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2463 PVM pVM = pPool->CTX_SUFF(pVM);
2464 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2465 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
2466 GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK,
2467 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
2468 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
2469 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
2470 pPool->pszAccessHandler);
2471 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2472 * the heap size should suffice. */
2473 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2474 PVMCPU pVCpu = VMMGetCpu(pVM);
2475 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2476 }
2477 pPage->fMonitored = true;
2478 return rc;
2479}
2480
2481
2482/**
2483 * Disables write monitoring of a guest page.
2484 *
2485 * @returns VBox status code.
2486 * @retval VINF_SUCCESS on success.
2487 * @param pPool The pool.
2488 * @param pPage The cached page.
2489 */
2490static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2491{
2492 /*
2493 * Filter out the relevant kinds.
2494 */
2495 switch (pPage->enmKind)
2496 {
2497 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2498 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2499 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2500 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2501 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2502 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2503 case PGMPOOLKIND_64BIT_PML4:
2504 case PGMPOOLKIND_32BIT_PD:
2505 case PGMPOOLKIND_PAE_PDPT:
2506 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2507 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2508 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2509 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2510 break;
2511
2512 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2513 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2514 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2515 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2516 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2517 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2518 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2519 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2520 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2521 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2522 case PGMPOOLKIND_ROOT_NESTED:
2523 case PGMPOOLKIND_PAE_PD_PHYS:
2524 case PGMPOOLKIND_PAE_PDPT_PHYS:
2525 case PGMPOOLKIND_32BIT_PD_PHYS:
2526 /* Nothing to monitor here. */
2527 Assert(!pPage->fMonitored);
2528 return VINF_SUCCESS;
2529
2530 default:
2531 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2532 }
2533 Assert(pPage->fMonitored);
2534
2535 /*
2536 * Remove the page from the monitored list or uninstall it if last.
2537 */
2538 const PVM pVM = pPool->CTX_SUFF(pVM);
2539 int rc;
2540 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2541 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2542 {
2543 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2544 {
2545 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2546 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2547 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK,
2548 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
2549 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
2550 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
2551 pPool->pszAccessHandler);
2552 AssertFatalRCSuccess(rc);
2553 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2554 }
2555 else
2556 {
2557 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2558 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2559 {
2560 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2561 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2562 }
2563 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2564 rc = VINF_SUCCESS;
2565 }
2566 }
2567 else
2568 {
2569 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2570 AssertFatalRC(rc);
2571 PVMCPU pVCpu = VMMGetCpu(pVM);
2572 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2573 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2574 }
2575 pPage->fMonitored = false;
2576
2577 /*
2578 * Remove it from the list of modified pages (if in it).
2579 */
2580 pgmPoolMonitorModifiedRemove(pPool, pPage);
2581
2582 return rc;
2583}
2584
2585
2586/**
2587 * Inserts the page into the list of modified pages.
2588 *
2589 * @param pPool The pool.
2590 * @param pPage The page.
2591 */
2592void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2593{
2594 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2595 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2596 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2597 && pPool->iModifiedHead != pPage->idx,
2598 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2599 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2600 pPool->iModifiedHead, pPool->cModifiedPages));
2601
2602 pPage->iModifiedNext = pPool->iModifiedHead;
2603 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2604 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2605 pPool->iModifiedHead = pPage->idx;
2606 pPool->cModifiedPages++;
2607#ifdef VBOX_WITH_STATISTICS
2608 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2609 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2610#endif
2611}
2612
2613
2614/**
2615 * Removes the page from the list of modified pages and resets the
2616 * modification counter.
2617 *
2618 * @param pPool The pool.
2619 * @param pPage The page which is believed to be in the list of modified pages.
2620 */
2621static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2622{
2623 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2624 if (pPool->iModifiedHead == pPage->idx)
2625 {
2626 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2627 pPool->iModifiedHead = pPage->iModifiedNext;
2628 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2629 {
2630 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2631 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2632 }
2633 pPool->cModifiedPages--;
2634 }
2635 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2636 {
2637 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2638 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2639 {
2640 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2641 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2642 }
2643 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2644 pPool->cModifiedPages--;
2645 }
2646 else
2647 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2648 pPage->cModifications = 0;
2649}
2650
2651
2652/**
2653 * Zaps the list of modified pages, resetting their modification counters in the process.
2654 *
2655 * @param pVM The VM handle.
2656 */
2657static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2658{
2659 pgmLock(pVM);
2660 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2661 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2662
2663 unsigned cPages = 0; NOREF(cPages);
2664
2665#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2666 pgmPoolResetDirtyPages(pVM);
2667#endif
2668
2669 uint16_t idx = pPool->iModifiedHead;
2670 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2671 while (idx != NIL_PGMPOOL_IDX)
2672 {
2673 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2674 idx = pPage->iModifiedNext;
2675 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2676 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2677 pPage->cModifications = 0;
2678 Assert(++cPages);
2679 }
2680 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2681 pPool->cModifiedPages = 0;
2682 pgmUnlock(pVM);
2683}
2684
2685
2686/**
2687 * Handle SyncCR3 pool tasks
2688 *
2689 * @returns VBox status code.
2690 * @retval VINF_SUCCESS if successfully added.
2691 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2692 * @param pVCpu The VMCPU handle.
2693 * @remark Should only be used when monitoring is available, thus placed in
2694 * the PGMPOOL_WITH_MONITORING #ifdef.
2695 */
2696int pgmPoolSyncCR3(PVMCPU pVCpu)
2697{
2698 PVM pVM = pVCpu->CTX_SUFF(pVM);
2699 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2700
2701 /*
2702 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2703 * Occasionally we will have to clear all the shadow page tables because we wanted
2704 * to monitor a page which was mapped by too many shadowed page tables. This operation
2705 * sometimes referred to as a 'lightweight flush'.
2706 */
2707# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2708 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2709 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2710# else /* !IN_RING3 */
2711 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2712 {
2713 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2714 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2715
2716 /* Make sure all other VCPUs return to ring 3. */
2717 if (pVM->cCpus > 1)
2718 {
2719 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2720 PGM_INVL_ALL_VCPU_TLBS(pVM);
2721 }
2722 return VINF_PGM_SYNC_CR3;
2723 }
2724# endif /* !IN_RING3 */
2725 else
2726 {
2727 pgmPoolMonitorModifiedClearAll(pVM);
2728
2729 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2730 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2731 {
2732 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2733 return pgmPoolSyncCR3(pVCpu);
2734 }
2735 }
2736 return VINF_SUCCESS;
2737}
2738
2739
2740/**
2741 * Frees up at least one user entry.
2742 *
2743 * @returns VBox status code.
2744 * @retval VINF_SUCCESS if successfully added.
2745 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2746 * @param pPool The pool.
2747 * @param iUser The user index.
2748 */
2749static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2750{
2751 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2752 /*
2753 * Just free cached pages in a braindead fashion.
2754 */
2755 /** @todo walk the age list backwards and free the first with usage. */
2756 int rc = VINF_SUCCESS;
2757 do
2758 {
2759 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2760 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2761 rc = rc2;
2762 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2763 return rc;
2764}
2765
2766
2767/**
2768 * Inserts a page into the cache.
2769 *
2770 * This will create user node for the page, insert it into the GCPhys
2771 * hash, and insert it into the age list.
2772 *
2773 * @returns VBox status code.
2774 * @retval VINF_SUCCESS if successfully added.
2775 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2776 * @param pPool The pool.
2777 * @param pPage The cached page.
2778 * @param GCPhys The GC physical address of the page we're gonna shadow.
2779 * @param iUser The user index.
2780 * @param iUserTable The user table index.
2781 */
2782DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2783{
2784 int rc = VINF_SUCCESS;
2785 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2786
2787 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable));
2788
2789#ifdef VBOX_STRICT
2790 /*
2791 * Check that the entry doesn't already exists.
2792 */
2793 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2794 {
2795 uint16_t i = pPage->iUserHead;
2796 do
2797 {
2798 Assert(i < pPool->cMaxUsers);
2799 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2800 i = paUsers[i].iNext;
2801 } while (i != NIL_PGMPOOL_USER_INDEX);
2802 }
2803#endif
2804
2805 /*
2806 * Find free a user node.
2807 */
2808 uint16_t i = pPool->iUserFreeHead;
2809 if (i == NIL_PGMPOOL_USER_INDEX)
2810 {
2811 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2812 if (RT_FAILURE(rc))
2813 return rc;
2814 i = pPool->iUserFreeHead;
2815 }
2816
2817 /*
2818 * Unlink the user node from the free list,
2819 * initialize and insert it into the user list.
2820 */
2821 pPool->iUserFreeHead = paUsers[i].iNext;
2822 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2823 paUsers[i].iUser = iUser;
2824 paUsers[i].iUserTable = iUserTable;
2825 pPage->iUserHead = i;
2826
2827 /*
2828 * Insert into cache and enable monitoring of the guest page if enabled.
2829 *
2830 * Until we implement caching of all levels, including the CR3 one, we'll
2831 * have to make sure we don't try monitor & cache any recursive reuse of
2832 * a monitored CR3 page. Because all windows versions are doing this we'll
2833 * have to be able to do combined access monitoring, CR3 + PT and
2834 * PD + PT (guest PAE).
2835 *
2836 * Update:
2837 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2838 */
2839 const bool fCanBeMonitored = true;
2840 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2841 if (fCanBeMonitored)
2842 {
2843 rc = pgmPoolMonitorInsert(pPool, pPage);
2844 AssertRC(rc);
2845 }
2846 return rc;
2847}
2848
2849
2850/**
2851 * Adds a user reference to a page.
2852 *
2853 * This will move the page to the head of the
2854 *
2855 * @returns VBox status code.
2856 * @retval VINF_SUCCESS if successfully added.
2857 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2858 * @param pPool The pool.
2859 * @param pPage The cached page.
2860 * @param iUser The user index.
2861 * @param iUserTable The user table.
2862 */
2863static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2864{
2865 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2866
2867 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2868
2869# ifdef VBOX_STRICT
2870 /*
2871 * Check that the entry doesn't already exists. We only allow multiple
2872 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
2873 */
2874 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2875 {
2876 uint16_t i = pPage->iUserHead;
2877 do
2878 {
2879 Assert(i < pPool->cMaxUsers);
2880 AssertMsg(iUser != PGMPOOL_IDX_PD || iUser != PGMPOOL_IDX_PDPT || iUser != PGMPOOL_IDX_NESTED_ROOT || iUser != PGMPOOL_IDX_AMD64_CR3 ||
2881 paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2882 i = paUsers[i].iNext;
2883 } while (i != NIL_PGMPOOL_USER_INDEX);
2884 }
2885# endif
2886
2887 /*
2888 * Allocate a user node.
2889 */
2890 uint16_t i = pPool->iUserFreeHead;
2891 if (i == NIL_PGMPOOL_USER_INDEX)
2892 {
2893 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2894 if (RT_FAILURE(rc))
2895 return rc;
2896 i = pPool->iUserFreeHead;
2897 }
2898 pPool->iUserFreeHead = paUsers[i].iNext;
2899
2900 /*
2901 * Initialize the user node and insert it.
2902 */
2903 paUsers[i].iNext = pPage->iUserHead;
2904 paUsers[i].iUser = iUser;
2905 paUsers[i].iUserTable = iUserTable;
2906 pPage->iUserHead = i;
2907
2908# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2909 if (pPage->fDirty)
2910 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirty, false /* do not remove */);
2911# endif
2912
2913 /*
2914 * Tell the cache to update its replacement stats for this page.
2915 */
2916 pgmPoolCacheUsed(pPool, pPage);
2917 return VINF_SUCCESS;
2918}
2919
2920
2921/**
2922 * Frees a user record associated with a page.
2923 *
2924 * This does not clear the entry in the user table, it simply replaces the
2925 * user record to the chain of free records.
2926 *
2927 * @param pPool The pool.
2928 * @param HCPhys The HC physical address of the shadow page.
2929 * @param iUser The shadow page pool index of the user table.
2930 * @param iUserTable The index into the user table (shadowed).
2931 */
2932static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2933{
2934 /*
2935 * Unlink and free the specified user entry.
2936 */
2937 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2938
2939 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2940 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2941 uint16_t i = pPage->iUserHead;
2942 if ( i != NIL_PGMPOOL_USER_INDEX
2943 && paUsers[i].iUser == iUser
2944 && paUsers[i].iUserTable == iUserTable)
2945 {
2946 pPage->iUserHead = paUsers[i].iNext;
2947
2948 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2949 paUsers[i].iNext = pPool->iUserFreeHead;
2950 pPool->iUserFreeHead = i;
2951 return;
2952 }
2953
2954 /* General: Linear search. */
2955 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2956 while (i != NIL_PGMPOOL_USER_INDEX)
2957 {
2958 if ( paUsers[i].iUser == iUser
2959 && paUsers[i].iUserTable == iUserTable)
2960 {
2961 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2962 paUsers[iPrev].iNext = paUsers[i].iNext;
2963 else
2964 pPage->iUserHead = paUsers[i].iNext;
2965
2966 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2967 paUsers[i].iNext = pPool->iUserFreeHead;
2968 pPool->iUserFreeHead = i;
2969 return;
2970 }
2971 iPrev = i;
2972 i = paUsers[i].iNext;
2973 }
2974
2975 /* Fatal: didn't find it */
2976 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
2977 iUser, iUserTable, pPage->GCPhys));
2978}
2979
2980
2981/**
2982 * Gets the entry size of a shadow table.
2983 *
2984 * @param enmKind The kind of page.
2985 *
2986 * @returns The size of the entry in bytes. That is, 4 or 8.
2987 * @returns If the kind is not for a table, an assertion is raised and 0 is
2988 * returned.
2989 */
2990DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2991{
2992 switch (enmKind)
2993 {
2994 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2995 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2996 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2997 case PGMPOOLKIND_32BIT_PD:
2998 case PGMPOOLKIND_32BIT_PD_PHYS:
2999 return 4;
3000
3001 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3002 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3003 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3004 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3005 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3006 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3007 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3008 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3009 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3010 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3011 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3012 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3013 case PGMPOOLKIND_64BIT_PML4:
3014 case PGMPOOLKIND_PAE_PDPT:
3015 case PGMPOOLKIND_ROOT_NESTED:
3016 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3017 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3018 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3019 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3020 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3021 case PGMPOOLKIND_PAE_PD_PHYS:
3022 case PGMPOOLKIND_PAE_PDPT_PHYS:
3023 return 8;
3024
3025 default:
3026 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3027 }
3028}
3029
3030
3031/**
3032 * Gets the entry size of a guest table.
3033 *
3034 * @param enmKind The kind of page.
3035 *
3036 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3037 * @returns If the kind is not for a table, an assertion is raised and 0 is
3038 * returned.
3039 */
3040DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3041{
3042 switch (enmKind)
3043 {
3044 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3045 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3046 case PGMPOOLKIND_32BIT_PD:
3047 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3048 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3049 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3050 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3051 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3052 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3053 return 4;
3054
3055 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3056 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3057 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3058 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3059 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3060 case PGMPOOLKIND_64BIT_PML4:
3061 case PGMPOOLKIND_PAE_PDPT:
3062 return 8;
3063
3064 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3065 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3066 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3067 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3068 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3069 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3070 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3071 case PGMPOOLKIND_ROOT_NESTED:
3072 case PGMPOOLKIND_PAE_PD_PHYS:
3073 case PGMPOOLKIND_PAE_PDPT_PHYS:
3074 case PGMPOOLKIND_32BIT_PD_PHYS:
3075 /** @todo can we return 0? (nobody is calling this...) */
3076 AssertFailed();
3077 return 0;
3078
3079 default:
3080 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3081 }
3082}
3083
3084
3085/**
3086 * Checks one shadow page table entry for a mapping of a physical page.
3087 *
3088 * @returns true / false indicating removal of all relevant PTEs
3089 *
3090 * @param pVM The VM handle.
3091 * @param pPhysPage The guest page in question.
3092 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3093 * @param iShw The shadow page table.
3094 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3095 */
3096static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3097{
3098 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3099 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3100 bool fRet = false;
3101
3102 /*
3103 * Assert sanity.
3104 */
3105 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3106 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3107 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3108
3109 /*
3110 * Then, clear the actual mappings to the page in the shadow PT.
3111 */
3112 switch (pPage->enmKind)
3113 {
3114 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3115 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3116 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3117 {
3118 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3119 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3120 uint32_t u32AndMask = 0;
3121 uint32_t u32OrMask = 0;
3122
3123 if (!fFlushPTEs)
3124 {
3125 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3126 {
3127 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
3128 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
3129 u32OrMask = X86_PTE_RW;
3130 u32AndMask = UINT32_MAX;
3131 fRet = true;
3132 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3133 break;
3134
3135 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3136 u32OrMask = 0;
3137 u32AndMask = ~X86_PTE_RW;
3138 fRet = true;
3139 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3140 break;
3141 default:
3142 /* (shouldn't be here, will assert below) */
3143 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3144 break;
3145 }
3146 }
3147 else
3148 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3149
3150 /* Update the counter if we're removing references. */
3151 if (!u32AndMask)
3152 {
3153 Assert(pPage->cPresent );
3154 Assert(pPool->cPresent);
3155 pPage->cPresent--;
3156 pPool->cPresent--;
3157 }
3158
3159 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3160 {
3161 X86PTE Pte;
3162
3163 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3164 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3165 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3166 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3167
3168 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3169 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3170 return fRet;
3171 }
3172#ifdef LOG_ENABLED
3173 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3174 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3175 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3176 {
3177 Log(("i=%d cFound=%d\n", i, ++cFound));
3178 }
3179#endif
3180 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3181 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3182 break;
3183 }
3184
3185 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3186 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3187 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3188 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3189 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3190 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3191 {
3192 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3193 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3194 uint64_t u64OrMask = 0;
3195 uint64_t u64AndMask = 0;
3196
3197 if (!fFlushPTEs)
3198 {
3199 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3200 {
3201 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3202 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3203 u64OrMask = X86_PTE_RW;
3204 u64AndMask = UINT64_MAX;
3205 fRet = true;
3206 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3207 break;
3208
3209 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3210 u64OrMask = 0;
3211 u64AndMask = ~(uint64_t)X86_PTE_RW;
3212 fRet = true;
3213 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3214 break;
3215
3216 default:
3217 /* (shouldn't be here, will assert below) */
3218 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3219 break;
3220 }
3221 }
3222 else
3223 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3224
3225 /* Update the counter if we're removing references. */
3226 if (!u64AndMask)
3227 {
3228 Assert(pPage->cPresent);
3229 Assert(pPool->cPresent);
3230 pPage->cPresent--;
3231 pPool->cPresent--;
3232 }
3233
3234 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3235 {
3236 X86PTEPAE Pte;
3237
3238 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3239 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3240 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3241 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3242
3243 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3244 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3245 return fRet;
3246 }
3247#ifdef LOG_ENABLED
3248 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3249 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3250 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3251 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3252 Log(("i=%d cFound=%d\n", i, ++cFound));
3253#endif
3254 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3255 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3256 break;
3257 }
3258
3259#ifdef PGM_WITH_LARGE_PAGES
3260 /* Large page case only. */
3261 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3262 {
3263 Assert(pVM->pgm.s.fNestedPaging);
3264
3265 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3266 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3267
3268 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3269 {
3270 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3271 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3272 pPD->a[iPte].u = 0;
3273 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3274
3275 /* Update the counter as we're removing references. */
3276 Assert(pPage->cPresent);
3277 Assert(pPool->cPresent);
3278 pPage->cPresent--;
3279 pPool->cPresent--;
3280
3281 return fRet;
3282 }
3283# ifdef LOG_ENABLED
3284 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3285 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3286 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3287 Log(("i=%d cFound=%d\n", i, ++cFound));
3288# endif
3289 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3290 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3291 break;
3292 }
3293
3294 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3295 case PGMPOOLKIND_PAE_PD_PHYS:
3296 {
3297 Assert(pVM->pgm.s.fNestedPaging);
3298
3299 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3300 PX86PD pPD = (PX86PD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3301
3302 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3303 {
3304 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3305 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3306 pPD->a[iPte].u = 0;
3307 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3308
3309 /* Update the counter as we're removing references. */
3310 Assert(pPage->cPresent);
3311 Assert(pPool->cPresent);
3312 pPage->cPresent--;
3313 pPool->cPresent--;
3314 return fRet;
3315 }
3316# ifdef LOG_ENABLED
3317 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3318 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3319 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3320 Log(("i=%d cFound=%d\n", i, ++cFound));
3321# endif
3322 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3323 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3324 break;
3325 }
3326#endif /* PGM_WITH_LARGE_PAGES */
3327
3328 default:
3329 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3330 }
3331
3332 /* not reached. */
3333#ifndef _MSC_VER
3334 return fRet;
3335#endif
3336}
3337
3338
3339/**
3340 * Scans one shadow page table for mappings of a physical page.
3341 *
3342 * @param pVM The VM handle.
3343 * @param pPhysPage The guest page in question.
3344 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3345 * @param iShw The shadow page table.
3346 */
3347static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3348{
3349 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3350
3351 /* We should only come here with when there's only one reference to this physical page. */
3352 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3353
3354 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3355 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3356 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3357 if (!fKeptPTEs)
3358 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3359 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3360}
3361
3362
3363/**
3364 * Flushes a list of shadow page tables mapping the same physical page.
3365 *
3366 * @param pVM The VM handle.
3367 * @param pPhysPage The guest page in question.
3368 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3369 * @param iPhysExt The physical cross reference extent list to flush.
3370 */
3371static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3372{
3373 PGM_LOCK_ASSERT_OWNER(pVM);
3374 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3375 bool fKeepList = false;
3376
3377 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3378 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3379
3380 const uint16_t iPhysExtStart = iPhysExt;
3381 PPGMPOOLPHYSEXT pPhysExt;
3382 do
3383 {
3384 Assert(iPhysExt < pPool->cMaxPhysExts);
3385 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3386 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3387 {
3388 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3389 {
3390 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3391 if (!fKeptPTEs)
3392 {
3393 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3394 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3395 }
3396 else
3397 fKeepList = true;
3398 }
3399 }
3400 /* next */
3401 iPhysExt = pPhysExt->iNext;
3402 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3403
3404 if (!fKeepList)
3405 {
3406 /* insert the list into the free list and clear the ram range entry. */
3407 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3408 pPool->iPhysExtFreeHead = iPhysExtStart;
3409 /* Invalidate the tracking data. */
3410 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3411 }
3412
3413 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3414}
3415
3416
3417/**
3418 * Flushes all shadow page table mappings of the given guest page.
3419 *
3420 * This is typically called when the host page backing the guest one has been
3421 * replaced or when the page protection was changed due to a guest access
3422 * caught by the monitoring.
3423 *
3424 * @returns VBox status code.
3425 * @retval VINF_SUCCESS if all references has been successfully cleared.
3426 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3427 * pool cleaning. FF and sync flags are set.
3428 *
3429 * @param pVM The VM handle.
3430 * @param GCPhysPage GC physical address of the page in question
3431 * @param pPhysPage The guest page in question.
3432 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3433 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3434 * flushed, it is NOT touched if this isn't necessary.
3435 * The caller MUST initialized this to @a false.
3436 */
3437int pgmPoolTrackUpdateGCPhys(PVM pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3438{
3439 PVMCPU pVCpu = VMMGetCpu(pVM);
3440 pgmLock(pVM);
3441 int rc = VINF_SUCCESS;
3442
3443#ifdef PGM_WITH_LARGE_PAGES
3444 /* Is this page part of a large page? */
3445 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3446 {
3447 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3448 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3449
3450 /* Fetch the large page base. */
3451 PPGMPAGE pLargePage;
3452 if (GCPhysBase != GCPhysPage)
3453 {
3454 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3455 AssertFatal(pLargePage);
3456 }
3457 else
3458 pLargePage = pPhysPage;
3459
3460 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3461
3462 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3463 {
3464 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3465 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3466 pVM->pgm.s.cLargePagesDisabled++;
3467
3468 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3469 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3470
3471 *pfFlushTLBs = true;
3472 pgmUnlock(pVM);
3473 return rc;
3474 }
3475 }
3476#else
3477 NOREF(GCPhysPage);
3478#endif /* PGM_WITH_LARGE_PAGES */
3479
3480 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3481 if (u16)
3482 {
3483 /*
3484 * The zero page is currently screwing up the tracking and we'll
3485 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3486 * is defined, zero pages won't normally be mapped. Some kind of solution
3487 * will be needed for this problem of course, but it will have to wait...
3488 */
3489 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3490 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3491 rc = VINF_PGM_GCPHYS_ALIASED;
3492 else
3493 {
3494# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC) /** @todo we can drop this now. */
3495 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3496 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3497 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
3498# endif
3499
3500 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3501 {
3502 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3503 pgmPoolTrackFlushGCPhysPT(pVM,
3504 pPhysPage,
3505 fFlushPTEs,
3506 PGMPOOL_TD_GET_IDX(u16));
3507 }
3508 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3509 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3510 else
3511 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3512 *pfFlushTLBs = true;
3513
3514# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
3515 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
3516# endif
3517 }
3518 }
3519
3520 if (rc == VINF_PGM_GCPHYS_ALIASED)
3521 {
3522 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3523 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3524 rc = VINF_PGM_SYNC_CR3;
3525 }
3526 pgmUnlock(pVM);
3527 return rc;
3528}
3529
3530
3531/**
3532 * Scans all shadow page tables for mappings of a physical page.
3533 *
3534 * This may be slow, but it's most likely more efficient than cleaning
3535 * out the entire page pool / cache.
3536 *
3537 * @returns VBox status code.
3538 * @retval VINF_SUCCESS if all references has been successfully cleared.
3539 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3540 * a page pool cleaning.
3541 *
3542 * @param pVM The VM handle.
3543 * @param pPhysPage The guest page in question.
3544 */
3545int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3546{
3547 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3548 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3549 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3550 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3551
3552 /*
3553 * There is a limit to what makes sense.
3554 */
3555 if ( pPool->cPresent > 1024
3556 && pVM->cCpus == 1)
3557 {
3558 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3559 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3560 return VINF_PGM_GCPHYS_ALIASED;
3561 }
3562
3563 /*
3564 * Iterate all the pages until we've encountered all that in use.
3565 * This is simple but not quite optimal solution.
3566 */
3567 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P; /** @todo drop X86_PTE_P here as we always test if present separately, anyway. */
3568 const uint32_t u32 = u64; /** @todo move into the 32BIT_PT_xx case */
3569 unsigned cLeft = pPool->cUsedPages;
3570 unsigned iPage = pPool->cCurPages;
3571 while (--iPage >= PGMPOOL_IDX_FIRST)
3572 {
3573 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3574 if ( pPage->GCPhys != NIL_RTGCPHYS
3575 && pPage->cPresent)
3576 {
3577 switch (pPage->enmKind)
3578 {
3579 /*
3580 * We only care about shadow page tables.
3581 */
3582 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3583 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3584 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3585 {
3586 unsigned cPresent = pPage->cPresent;
3587 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3588 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3589 if (pPT->a[i].n.u1Present)
3590 {
3591 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3592 {
3593 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3594 pPT->a[i].u = 0;
3595
3596 /* Update the counter as we're removing references. */
3597 Assert(pPage->cPresent);
3598 Assert(pPool->cPresent);
3599 pPage->cPresent--;
3600 pPool->cPresent--;
3601 }
3602 if (!--cPresent)
3603 break;
3604 }
3605 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3606 break;
3607 }
3608
3609 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3610 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3611 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3612 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3613 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3614 {
3615 unsigned cPresent = pPage->cPresent;
3616 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3617 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3618 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3619 {
3620 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3621 {
3622 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3623 PGMSHWPTEPAE_SET(pPT->a[i], 0); /// @todo why not atomic?
3624
3625 /* Update the counter as we're removing references. */
3626 Assert(pPage->cPresent);
3627 Assert(pPool->cPresent);
3628 pPage->cPresent--;
3629 pPool->cPresent--;
3630 }
3631 if (!--cPresent)
3632 break;
3633 }
3634 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3635 break;
3636 }
3637#ifndef IN_RC
3638 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3639 {
3640 unsigned cPresent = pPage->cPresent;
3641 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3642 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3643 if (pPT->a[i].n.u1Present)
3644 {
3645 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3646 {
3647 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3648 pPT->a[i].u = 0;
3649
3650 /* Update the counter as we're removing references. */
3651 Assert(pPage->cPresent);
3652 Assert(pPool->cPresent);
3653 pPage->cPresent--;
3654 pPool->cPresent--;
3655 }
3656 if (!--cPresent)
3657 break;
3658 }
3659 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3660 break;
3661 }
3662#endif
3663 }
3664 if (!--cLeft)
3665 break;
3666 }
3667 }
3668
3669 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3670 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3671
3672 /*
3673 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3674 */
3675 if (pPool->cPresent > 1024)
3676 {
3677 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3678 return VINF_PGM_GCPHYS_ALIASED;
3679 }
3680
3681 return VINF_SUCCESS;
3682}
3683
3684
3685/**
3686 * Clears the user entry in a user table.
3687 *
3688 * This is used to remove all references to a page when flushing it.
3689 */
3690static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3691{
3692 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3693 Assert(pUser->iUser < pPool->cCurPages);
3694 uint32_t iUserTable = pUser->iUserTable;
3695
3696 /*
3697 * Map the user page.
3698 */
3699 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3700 union
3701 {
3702 uint64_t *pau64;
3703 uint32_t *pau32;
3704 } u;
3705 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3706
3707 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3708
3709 /* Safety precaution in case we change the paging for other modes too in the future. */
3710 Assert(!pgmPoolIsPageLocked(pPage));
3711
3712#ifdef VBOX_STRICT
3713 /*
3714 * Some sanity checks.
3715 */
3716 switch (pUserPage->enmKind)
3717 {
3718 case PGMPOOLKIND_32BIT_PD:
3719 case PGMPOOLKIND_32BIT_PD_PHYS:
3720 Assert(iUserTable < X86_PG_ENTRIES);
3721 break;
3722 case PGMPOOLKIND_PAE_PDPT:
3723 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3724 case PGMPOOLKIND_PAE_PDPT_PHYS:
3725 Assert(iUserTable < 4);
3726 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3727 break;
3728 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3729 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3730 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3731 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3732 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3733 case PGMPOOLKIND_PAE_PD_PHYS:
3734 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3735 break;
3736 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3737 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3738 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3739 break;
3740 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3741 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3742 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3743 break;
3744 case PGMPOOLKIND_64BIT_PML4:
3745 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3746 /* GCPhys >> PAGE_SHIFT is the index here */
3747 break;
3748 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3749 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3750 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3751 break;
3752
3753 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3754 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3755 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3756 break;
3757
3758 case PGMPOOLKIND_ROOT_NESTED:
3759 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3760 break;
3761
3762 default:
3763 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3764 break;
3765 }
3766#endif /* VBOX_STRICT */
3767
3768 /*
3769 * Clear the entry in the user page.
3770 */
3771 switch (pUserPage->enmKind)
3772 {
3773 /* 32-bit entries */
3774 case PGMPOOLKIND_32BIT_PD:
3775 case PGMPOOLKIND_32BIT_PD_PHYS:
3776 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
3777 break;
3778
3779 /* 64-bit entries */
3780 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3781 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3782 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3783 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3784 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3785#ifdef IN_RC
3786 /*
3787 * In 32 bits PAE mode we *must* invalidate the TLB when changing a
3788 * PDPT entry; the CPU fetches them only during cr3 load, so any
3789 * non-present PDPT will continue to cause page faults.
3790 */
3791 ASMReloadCR3();
3792 /* no break */
3793#endif
3794 case PGMPOOLKIND_PAE_PD_PHYS:
3795 case PGMPOOLKIND_PAE_PDPT_PHYS:
3796 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3797 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3798 case PGMPOOLKIND_64BIT_PML4:
3799 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3800 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3801 case PGMPOOLKIND_PAE_PDPT:
3802 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3803 case PGMPOOLKIND_ROOT_NESTED:
3804 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3805 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3806 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
3807 break;
3808
3809 default:
3810 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3811 }
3812 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
3813}
3814
3815
3816/**
3817 * Clears all users of a page.
3818 */
3819static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3820{
3821 /*
3822 * Free all the user records.
3823 */
3824 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3825
3826 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3827 uint16_t i = pPage->iUserHead;
3828 while (i != NIL_PGMPOOL_USER_INDEX)
3829 {
3830 /* Clear enter in user table. */
3831 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3832
3833 /* Free it. */
3834 const uint16_t iNext = paUsers[i].iNext;
3835 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3836 paUsers[i].iNext = pPool->iUserFreeHead;
3837 pPool->iUserFreeHead = i;
3838
3839 /* Next. */
3840 i = iNext;
3841 }
3842 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3843}
3844
3845
3846/**
3847 * Allocates a new physical cross reference extent.
3848 *
3849 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3850 * @param pVM The VM handle.
3851 * @param piPhysExt Where to store the phys ext index.
3852 */
3853PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3854{
3855 PGM_LOCK_ASSERT_OWNER(pVM);
3856 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3857 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3858 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3859 {
3860 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3861 return NULL;
3862 }
3863 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3864 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3865 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3866 *piPhysExt = iPhysExt;
3867 return pPhysExt;
3868}
3869
3870
3871/**
3872 * Frees a physical cross reference extent.
3873 *
3874 * @param pVM The VM handle.
3875 * @param iPhysExt The extent to free.
3876 */
3877void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3878{
3879 PGM_LOCK_ASSERT_OWNER(pVM);
3880 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3881 Assert(iPhysExt < pPool->cMaxPhysExts);
3882 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3883 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3884 {
3885 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3886 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3887 }
3888 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3889 pPool->iPhysExtFreeHead = iPhysExt;
3890}
3891
3892
3893/**
3894 * Frees a physical cross reference extent.
3895 *
3896 * @param pVM The VM handle.
3897 * @param iPhysExt The extent to free.
3898 */
3899void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3900{
3901 PGM_LOCK_ASSERT_OWNER(pVM);
3902 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3903
3904 const uint16_t iPhysExtStart = iPhysExt;
3905 PPGMPOOLPHYSEXT pPhysExt;
3906 do
3907 {
3908 Assert(iPhysExt < pPool->cMaxPhysExts);
3909 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3910 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3911 {
3912 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3913 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3914 }
3915
3916 /* next */
3917 iPhysExt = pPhysExt->iNext;
3918 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3919
3920 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3921 pPool->iPhysExtFreeHead = iPhysExtStart;
3922}
3923
3924
3925/**
3926 * Insert a reference into a list of physical cross reference extents.
3927 *
3928 * @returns The new tracking data for PGMPAGE.
3929 *
3930 * @param pVM The VM handle.
3931 * @param iPhysExt The physical extent index of the list head.
3932 * @param iShwPT The shadow page table index.
3933 * @param iPte Page table entry
3934 *
3935 */
3936static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
3937{
3938 PGM_LOCK_ASSERT_OWNER(pVM);
3939 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3940 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3941
3942 /*
3943 * Special common cases.
3944 */
3945 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
3946 {
3947 paPhysExts[iPhysExt].aidx[1] = iShwPT;
3948 paPhysExts[iPhysExt].apte[1] = iPte;
3949 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
3950 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
3951 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3952 }
3953 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3954 {
3955 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3956 paPhysExts[iPhysExt].apte[2] = iPte;
3957 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
3958 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
3959 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3960 }
3961 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
3962
3963 /*
3964 * General treatment.
3965 */
3966 const uint16_t iPhysExtStart = iPhysExt;
3967 unsigned cMax = 15;
3968 for (;;)
3969 {
3970 Assert(iPhysExt < pPool->cMaxPhysExts);
3971 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3972 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3973 {
3974 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3975 paPhysExts[iPhysExt].apte[i] = iPte;
3976 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
3977 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
3978 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3979 }
3980 if (!--cMax)
3981 {
3982 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackOverflows);
3983 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3984 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
3985 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3986 }
3987
3988 /* advance */
3989 iPhysExt = paPhysExts[iPhysExt].iNext;
3990 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3991 break;
3992 }
3993
3994 /*
3995 * Add another extent to the list.
3996 */
3997 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3998 if (!pNew)
3999 {
4000 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackNoExtentsLeft);
4001 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4002 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4003 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4004 }
4005 pNew->iNext = iPhysExtStart;
4006 pNew->aidx[0] = iShwPT;
4007 pNew->apte[0] = iPte;
4008 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4009 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4010}
4011
4012
4013/**
4014 * Add a reference to guest physical page where extents are in use.
4015 *
4016 * @returns The new tracking data for PGMPAGE.
4017 *
4018 * @param pVM The VM handle.
4019 * @param pPhysPage Pointer to the aPages entry in the ram range.
4020 * @param u16 The ram range flags (top 16-bits).
4021 * @param iShwPT The shadow page table index.
4022 * @param iPte Page table entry
4023 */
4024uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4025{
4026 pgmLock(pVM);
4027 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4028 {
4029 /*
4030 * Convert to extent list.
4031 */
4032 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4033 uint16_t iPhysExt;
4034 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4035 if (pPhysExt)
4036 {
4037 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4038 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliased);
4039 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4040 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4041 pPhysExt->aidx[1] = iShwPT;
4042 pPhysExt->apte[1] = iPte;
4043 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4044 }
4045 else
4046 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4047 }
4048 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4049 {
4050 /*
4051 * Insert into the extent list.
4052 */
4053 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4054 }
4055 else
4056 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedLots);
4057 pgmUnlock(pVM);
4058 return u16;
4059}
4060
4061
4062/**
4063 * Clear references to guest physical memory.
4064 *
4065 * @param pPool The pool.
4066 * @param pPage The page.
4067 * @param pPhysPage Pointer to the aPages entry in the ram range.
4068 * @param iPte Shadow PTE index
4069 */
4070void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4071{
4072 PVM pVM = pPool->CTX_SUFF(pVM);
4073 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4074 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4075
4076 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4077 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4078 {
4079 pgmLock(pVM);
4080
4081 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4082 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4083 do
4084 {
4085 Assert(iPhysExt < pPool->cMaxPhysExts);
4086
4087 /*
4088 * Look for the shadow page and check if it's all freed.
4089 */
4090 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4091 {
4092 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4093 && paPhysExts[iPhysExt].apte[i] == iPte)
4094 {
4095 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4096 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4097
4098 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4099 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4100 {
4101 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4102 pgmUnlock(pVM);
4103 return;
4104 }
4105
4106 /* we can free the node. */
4107 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4108 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4109 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4110 {
4111 /* lonely node */
4112 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4113 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4114 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4115 }
4116 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4117 {
4118 /* head */
4119 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4120 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4121 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4122 }
4123 else
4124 {
4125 /* in list */
4126 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4127 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4128 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4129 }
4130 iPhysExt = iPhysExtNext;
4131 pgmUnlock(pVM);
4132 return;
4133 }
4134 }
4135
4136 /* next */
4137 iPhysExtPrev = iPhysExt;
4138 iPhysExt = paPhysExts[iPhysExt].iNext;
4139 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4140
4141 pgmUnlock(pVM);
4142 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4143 }
4144 else /* nothing to do */
4145 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4146}
4147
4148/**
4149 * Clear references to guest physical memory.
4150 *
4151 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4152 * physical address is assumed to be correct, so the linear search can be
4153 * skipped and we can assert at an earlier point.
4154 *
4155 * @param pPool The pool.
4156 * @param pPage The page.
4157 * @param HCPhys The host physical address corresponding to the guest page.
4158 * @param GCPhys The guest physical address corresponding to HCPhys.
4159 * @param iPte Shadow PTE index
4160 */
4161static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4162{
4163 /*
4164 * Lookup the page and check if it checks out before derefing it.
4165 */
4166 PVM pVM = pPool->CTX_SUFF(pVM);
4167 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4168 if (pPhysPage)
4169 {
4170 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4171#ifdef LOG_ENABLED
4172 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4173 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4174#endif
4175 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4176 {
4177 Assert(pPage->cPresent);
4178 Assert(pPool->cPresent);
4179 pPage->cPresent--;
4180 pPool->cPresent--;
4181 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4182 return;
4183 }
4184
4185 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp\n",
4186 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage)));
4187 }
4188 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4189}
4190
4191
4192/**
4193 * Clear references to guest physical memory.
4194 *
4195 * @param pPool The pool.
4196 * @param pPage The page.
4197 * @param HCPhys The host physical address corresponding to the guest page.
4198 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4199 * @param iPte Shadow pte index
4200 */
4201void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4202{
4203 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4204
4205 /*
4206 * Try the hint first.
4207 */
4208 RTHCPHYS HCPhysHinted;
4209 PVM pVM = pPool->CTX_SUFF(pVM);
4210 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4211 if (pPhysPage)
4212 {
4213 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4214 Assert(HCPhysHinted);
4215 if (HCPhysHinted == HCPhys)
4216 {
4217 Assert(pPage->cPresent);
4218 Assert(pPool->cPresent);
4219 pPage->cPresent--;
4220 pPool->cPresent--;
4221 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4222 return;
4223 }
4224 }
4225 else
4226 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4227
4228 /*
4229 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4230 */
4231 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4232 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX);
4233 while (pRam)
4234 {
4235 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4236 while (iPage-- > 0)
4237 {
4238 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4239 {
4240 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4241 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4242 Assert(pPage->cPresent);
4243 Assert(pPool->cPresent);
4244 pPage->cPresent--;
4245 pPool->cPresent--;
4246 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4247 return;
4248 }
4249 }
4250 pRam = pRam->CTX_SUFF(pNext);
4251 }
4252
4253 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4254}
4255
4256
4257/**
4258 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4259 *
4260 * @param pPool The pool.
4261 * @param pPage The page.
4262 * @param pShwPT The shadow page table (mapping of the page).
4263 * @param pGstPT The guest page table.
4264 */
4265DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4266{
4267 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4268 {
4269 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4270 if (pShwPT->a[i].n.u1Present)
4271 {
4272 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4273 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4274 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK, i);
4275 if (!pPage->cPresent)
4276 break;
4277 }
4278 }
4279}
4280
4281
4282/**
4283 * Clear references to guest physical memory in a PAE / 32-bit page table.
4284 *
4285 * @param pPool The pool.
4286 * @param pPage The page.
4287 * @param pShwPT The shadow page table (mapping of the page).
4288 * @param pGstPT The guest page table (just a half one).
4289 */
4290DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4291{
4292 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4293 {
4294 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4295 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4296 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4297 {
4298 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4299 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4300 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK, i);
4301 if (!pPage->cPresent)
4302 break;
4303 }
4304 }
4305}
4306
4307
4308/**
4309 * Clear references to guest physical memory in a PAE / PAE page table.
4310 *
4311 * @param pPool The pool.
4312 * @param pPage The page.
4313 * @param pShwPT The shadow page table (mapping of the page).
4314 * @param pGstPT The guest page table.
4315 */
4316DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4317{
4318 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4319 {
4320 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4321 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4322 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4323 {
4324 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4325 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4326 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
4327 if (!pPage->cPresent)
4328 break;
4329 }
4330 }
4331}
4332
4333
4334/**
4335 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4336 *
4337 * @param pPool The pool.
4338 * @param pPage The page.
4339 * @param pShwPT The shadow page table (mapping of the page).
4340 */
4341DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4342{
4343 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4344 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4345 {
4346 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4347 if (pShwPT->a[i].n.u1Present)
4348 {
4349 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4350 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4351 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys, i);
4352 if (!pPage->cPresent)
4353 break;
4354 }
4355 }
4356}
4357
4358
4359/**
4360 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4361 *
4362 * @param pPool The pool.
4363 * @param pPage The page.
4364 * @param pShwPT The shadow page table (mapping of the page).
4365 */
4366DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4367{
4368 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4369 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4370 {
4371 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4372 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4373 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4374 {
4375 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4376 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4377 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys, i);
4378 if (!pPage->cPresent)
4379 break;
4380 }
4381 }
4382}
4383
4384
4385/**
4386 * Clear references to shadowed pages in an EPT page table.
4387 *
4388 * @param pPool The pool.
4389 * @param pPage The page.
4390 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4391 */
4392DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4393{
4394 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4395 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4396 {
4397 Assert((pShwPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4398 if (pShwPT->a[i].n.u1Present)
4399 {
4400 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4401 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4402 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys, i);
4403 if (!pPage->cPresent)
4404 break;
4405 }
4406 }
4407}
4408
4409
4410
4411/**
4412 * Clear references to shadowed pages in a 32 bits page directory.
4413 *
4414 * @param pPool The pool.
4415 * @param pPage The page.
4416 * @param pShwPD The shadow page directory (mapping of the page).
4417 */
4418DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4419{
4420 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4421 {
4422 Assert(!(pShwPD->a[i].u & RT_BIT_32(9)));
4423 if ( pShwPD->a[i].n.u1Present
4424 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4425 )
4426 {
4427 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4428 if (pSubPage)
4429 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4430 else
4431 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4432 }
4433 }
4434}
4435
4436/**
4437 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4438 *
4439 * @param pPool The pool.
4440 * @param pPage The page.
4441 * @param pShwPD The shadow page directory (mapping of the page).
4442 */
4443DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4444{
4445 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4446 {
4447 if ( pShwPD->a[i].n.u1Present
4448 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING))
4449 {
4450#ifdef PGM_WITH_LARGE_PAGES
4451 if (pShwPD->a[i].b.u1Size)
4452 {
4453 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4454 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4455 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */, i);
4456 }
4457 else
4458#endif
4459 {
4460 Assert((pShwPD->a[i].u & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4461 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4462 if (pSubPage)
4463 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4464 else
4465 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4466 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4467 }
4468 }
4469 }
4470}
4471
4472/**
4473 * Clear references to shadowed pages in a PAE page directory pointer table.
4474 *
4475 * @param pPool The pool.
4476 * @param pPage The page.
4477 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4478 */
4479DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4480{
4481 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4482 {
4483 Assert((pShwPDPT->a[i].u & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4484 if ( pShwPDPT->a[i].n.u1Present
4485 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4486 )
4487 {
4488 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4489 if (pSubPage)
4490 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4491 else
4492 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4493 }
4494 }
4495}
4496
4497
4498/**
4499 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4500 *
4501 * @param pPool The pool.
4502 * @param pPage The page.
4503 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4504 */
4505DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4506{
4507 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4508 {
4509 Assert((pShwPDPT->a[i].u & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4510 if (pShwPDPT->a[i].n.u1Present)
4511 {
4512 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4513 if (pSubPage)
4514 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4515 else
4516 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4517 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4518 }
4519 }
4520}
4521
4522
4523/**
4524 * Clear references to shadowed pages in a 64-bit level 4 page table.
4525 *
4526 * @param pPool The pool.
4527 * @param pPage The page.
4528 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4529 */
4530DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4531{
4532 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4533 {
4534 Assert((pShwPML4->a[i].u & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4535 if (pShwPML4->a[i].n.u1Present)
4536 {
4537 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4538 if (pSubPage)
4539 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4540 else
4541 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4542 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4543 }
4544 }
4545}
4546
4547
4548/**
4549 * Clear references to shadowed pages in an EPT page directory.
4550 *
4551 * @param pPool The pool.
4552 * @param pPage The page.
4553 * @param pShwPD The shadow page directory (mapping of the page).
4554 */
4555DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4556{
4557 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4558 {
4559 Assert((pShwPD->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4560 if (pShwPD->a[i].n.u1Present)
4561 {
4562#ifdef PGM_WITH_LARGE_PAGES
4563 if (pShwPD->a[i].b.u1Size)
4564 {
4565 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4566 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4567 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */, i);
4568 }
4569 else
4570#endif
4571 {
4572 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4573 if (pSubPage)
4574 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4575 else
4576 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4577 }
4578 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4579 }
4580 }
4581}
4582
4583
4584/**
4585 * Clear references to shadowed pages in an EPT page directory pointer table.
4586 *
4587 * @param pPool The pool.
4588 * @param pPage The page.
4589 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4590 */
4591DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4592{
4593 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4594 {
4595 Assert((pShwPDPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4596 if (pShwPDPT->a[i].n.u1Present)
4597 {
4598 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4599 if (pSubPage)
4600 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4601 else
4602 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4603 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4604 }
4605 }
4606}
4607
4608
4609/**
4610 * Clears all references made by this page.
4611 *
4612 * This includes other shadow pages and GC physical addresses.
4613 *
4614 * @param pPool The pool.
4615 * @param pPage The page.
4616 */
4617static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4618{
4619 /*
4620 * Map the shadow page and take action according to the page kind.
4621 */
4622 PVM pVM = pPool->CTX_SUFF(pVM);
4623 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4624 switch (pPage->enmKind)
4625 {
4626 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4627 {
4628 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4629 void *pvGst;
4630 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4631 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4632 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4633 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4634 break;
4635 }
4636
4637 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4638 {
4639 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4640 void *pvGst;
4641 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4642 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
4643 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4644 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4645 break;
4646 }
4647
4648 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4649 {
4650 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4651 void *pvGst;
4652 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4653 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
4654 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4655 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4656 break;
4657 }
4658
4659 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4660 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4661 {
4662 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4663 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4664 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4665 break;
4666 }
4667
4668 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4669 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4670 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4671 {
4672 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4673 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
4674 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4675 break;
4676 }
4677
4678 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4679 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4680 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4681 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4682 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4683 case PGMPOOLKIND_PAE_PD_PHYS:
4684 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4685 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4686 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4687 break;
4688
4689 case PGMPOOLKIND_32BIT_PD_PHYS:
4690 case PGMPOOLKIND_32BIT_PD:
4691 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4692 break;
4693
4694 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4695 case PGMPOOLKIND_PAE_PDPT:
4696 case PGMPOOLKIND_PAE_PDPT_PHYS:
4697 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4698 break;
4699
4700 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4701 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4702 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4703 break;
4704
4705 case PGMPOOLKIND_64BIT_PML4:
4706 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4707 break;
4708
4709 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4710 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4711 break;
4712
4713 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4714 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4715 break;
4716
4717 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4718 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4719 break;
4720
4721 default:
4722 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4723 }
4724
4725 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4726 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4727 ASMMemZeroPage(pvShw);
4728 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4729 pPage->fZeroed = true;
4730 Assert(!pPage->cPresent);
4731 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
4732}
4733
4734/**
4735 * Flushes a pool page.
4736 *
4737 * This moves the page to the free list after removing all user references to it.
4738 *
4739 * @returns VBox status code.
4740 * @retval VINF_SUCCESS on success.
4741 * @param pPool The pool.
4742 * @param HCPhys The HC physical address of the shadow page.
4743 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4744 */
4745int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4746{
4747 PVM pVM = pPool->CTX_SUFF(pVM);
4748 bool fFlushRequired = false;
4749
4750 int rc = VINF_SUCCESS;
4751 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4752 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4753 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4754
4755 /*
4756 * Quietly reject any attempts at flushing any of the special root pages.
4757 */
4758 if (pPage->idx < PGMPOOL_IDX_FIRST)
4759 {
4760 AssertFailed(); /* can no longer happen */
4761 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4762 return VINF_SUCCESS;
4763 }
4764
4765 pgmLock(pVM);
4766
4767 /*
4768 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4769 */
4770 if (pgmPoolIsPageLocked(pPage))
4771 {
4772 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4773 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4774 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4775 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4776 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4777 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4778 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4779 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4780 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
4781 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
4782 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4783 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4784 pgmUnlock(pVM);
4785 return VINF_SUCCESS;
4786 }
4787
4788#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4789 /* Start a subset so we won't run out of mapping space. */
4790 PVMCPU pVCpu = VMMGetCpu(pVM);
4791 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
4792#endif
4793
4794 /*
4795 * Mark the page as being in need of an ASMMemZeroPage().
4796 */
4797 pPage->fZeroed = false;
4798
4799#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4800 if (pPage->fDirty)
4801 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirty, false /* do not remove */);
4802#endif
4803
4804 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4805 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4806 fFlushRequired = true;
4807
4808 /*
4809 * Clear the page.
4810 */
4811 pgmPoolTrackClearPageUsers(pPool, pPage);
4812 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4813 pgmPoolTrackDeref(pPool, pPage);
4814 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4815
4816 /*
4817 * Flush it from the cache.
4818 */
4819 pgmPoolCacheFlushPage(pPool, pPage);
4820
4821#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4822 /* Heavy stuff done. */
4823 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
4824#endif
4825
4826 /*
4827 * Deregistering the monitoring.
4828 */
4829 if (pPage->fMonitored)
4830 rc = pgmPoolMonitorFlush(pPool, pPage);
4831
4832 /*
4833 * Free the page.
4834 */
4835 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4836 pPage->iNext = pPool->iFreeHead;
4837 pPool->iFreeHead = pPage->idx;
4838 pPage->enmKind = PGMPOOLKIND_FREE;
4839 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4840 pPage->GCPhys = NIL_RTGCPHYS;
4841 pPage->fReusedFlushPending = false;
4842
4843 pPool->cUsedPages--;
4844
4845 /* Flush the TLBs of all VCPUs if required. */
4846 if ( fFlushRequired
4847 && fFlush)
4848 {
4849 PGM_INVL_ALL_VCPU_TLBS(pVM);
4850 }
4851
4852 pgmUnlock(pVM);
4853 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4854 return rc;
4855}
4856
4857
4858/**
4859 * Frees a usage of a pool page.
4860 *
4861 * The caller is responsible to updating the user table so that it no longer
4862 * references the shadow page.
4863 *
4864 * @param pPool The pool.
4865 * @param HCPhys The HC physical address of the shadow page.
4866 * @param iUser The shadow page pool index of the user table.
4867 * @param iUserTable The index into the user table (shadowed).
4868 */
4869void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4870{
4871 PVM pVM = pPool->CTX_SUFF(pVM);
4872
4873 STAM_PROFILE_START(&pPool->StatFree, a);
4874 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
4875 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4876 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4877 pgmLock(pVM);
4878 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4879 if (!pPage->fCached)
4880 pgmPoolFlushPage(pPool, pPage);
4881 pgmUnlock(pVM);
4882 STAM_PROFILE_STOP(&pPool->StatFree, a);
4883}
4884
4885
4886/**
4887 * Makes one or more free page free.
4888 *
4889 * @returns VBox status code.
4890 * @retval VINF_SUCCESS on success.
4891 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4892 *
4893 * @param pPool The pool.
4894 * @param enmKind Page table kind
4895 * @param iUser The user of the page.
4896 */
4897static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4898{
4899 PVM pVM = pPool->CTX_SUFF(pVM);
4900
4901 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%d\n", iUser));
4902
4903 /*
4904 * If the pool isn't full grown yet, expand it.
4905 */
4906 if ( pPool->cCurPages < pPool->cMaxPages
4907#if defined(IN_RC)
4908 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
4909 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4910 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
4911#endif
4912 )
4913 {
4914 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4915#ifdef IN_RING3
4916 int rc = PGMR3PoolGrow(pVM);
4917#else
4918 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
4919#endif
4920 if (RT_FAILURE(rc))
4921 return rc;
4922 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4923 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4924 return VINF_SUCCESS;
4925 }
4926
4927 /*
4928 * Free one cached page.
4929 */
4930 return pgmPoolCacheFreeOne(pPool, iUser);
4931}
4932
4933/**
4934 * Allocates a page from the pool.
4935 *
4936 * This page may actually be a cached page and not in need of any processing
4937 * on the callers part.
4938 *
4939 * @returns VBox status code.
4940 * @retval VINF_SUCCESS if a NEW page was allocated.
4941 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4942 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4943 * @param pVM The VM handle.
4944 * @param GCPhys The GC physical address of the page we're gonna shadow.
4945 * For 4MB and 2MB PD entries, it's the first address the
4946 * shadow PT is covering.
4947 * @param enmKind The kind of mapping.
4948 * @param enmAccess Access type for the mapping (only relevant for big pages)
4949 * @param iUser The shadow page pool index of the user table.
4950 * @param iUserTable The index into the user table (shadowed).
4951 * @param fLockPage Lock the page
4952 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4953 */
4954int pgmPoolAllocEx(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable,
4955 bool fLockPage, PPPGMPOOLPAGE ppPage)
4956{
4957 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4958 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4959 LogFlow(("pgmPoolAllocEx: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4960 *ppPage = NULL;
4961 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4962 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4963 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4964
4965 pgmLock(pVM);
4966
4967 if (pPool->fCacheEnabled)
4968 {
4969 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, iUser, iUserTable, ppPage);
4970 if (RT_SUCCESS(rc2))
4971 {
4972 if (fLockPage)
4973 pgmPoolLockPage(pPool, *ppPage);
4974 pgmUnlock(pVM);
4975 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4976 LogFlow(("pgmPoolAllocEx: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4977 return rc2;
4978 }
4979 }
4980
4981 /*
4982 * Allocate a new one.
4983 */
4984 int rc = VINF_SUCCESS;
4985 uint16_t iNew = pPool->iFreeHead;
4986 if (iNew == NIL_PGMPOOL_IDX)
4987 {
4988 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
4989 if (RT_FAILURE(rc))
4990 {
4991 pgmUnlock(pVM);
4992 Log(("pgmPoolAllocEx: returns %Rrc (Free)\n", rc));
4993 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4994 return rc;
4995 }
4996 iNew = pPool->iFreeHead;
4997 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4998 }
4999
5000 /* unlink the free head */
5001 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5002 pPool->iFreeHead = pPage->iNext;
5003 pPage->iNext = NIL_PGMPOOL_IDX;
5004
5005 /*
5006 * Initialize it.
5007 */
5008 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5009 pPage->enmKind = enmKind;
5010 pPage->enmAccess = enmAccess;
5011 pPage->GCPhys = GCPhys;
5012 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5013 pPage->fMonitored = false;
5014 pPage->fCached = false;
5015#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5016 pPage->fDirty = false;
5017#endif
5018 pPage->fReusedFlushPending = false;
5019 pPage->cModifications = 0;
5020 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5021 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5022 pPage->cLocked = 0;
5023 pPage->cPresent = 0;
5024 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5025 pPage->pvLastAccessHandlerFault = 0;
5026 pPage->cLastAccessHandlerCount = 0;
5027 pPage->pvLastAccessHandlerRip = 0;
5028
5029 /*
5030 * Insert into the tracking and cache. If this fails, free the page.
5031 */
5032 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5033 if (RT_FAILURE(rc3))
5034 {
5035 pPool->cUsedPages--;
5036 pPage->enmKind = PGMPOOLKIND_FREE;
5037 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5038 pPage->GCPhys = NIL_RTGCPHYS;
5039 pPage->iNext = pPool->iFreeHead;
5040 pPool->iFreeHead = pPage->idx;
5041 pgmUnlock(pVM);
5042 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5043 Log(("pgmPoolAllocEx: returns %Rrc (Insert)\n", rc3));
5044 return rc3;
5045 }
5046
5047 /*
5048 * Commit the allocation, clear the page and return.
5049 */
5050#ifdef VBOX_WITH_STATISTICS
5051 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5052 pPool->cUsedPagesHigh = pPool->cUsedPages;
5053#endif
5054
5055 if (!pPage->fZeroed)
5056 {
5057 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5058 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5059 ASMMemZeroPage(pv);
5060 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5061 }
5062
5063 *ppPage = pPage;
5064 if (fLockPage)
5065 pgmPoolLockPage(pPool, pPage);
5066 pgmUnlock(pVM);
5067 LogFlow(("pgmPoolAllocEx: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5068 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5069 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5070 return rc;
5071}
5072
5073
5074/**
5075 * Frees a usage of a pool page.
5076 *
5077 * @param pVM The VM handle.
5078 * @param HCPhys The HC physical address of the shadow page.
5079 * @param iUser The shadow page pool index of the user table.
5080 * @param iUserTable The index into the user table (shadowed).
5081 */
5082void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5083{
5084 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5085 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5086 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5087}
5088
5089/**
5090 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5091 *
5092 * @returns Pointer to the shadow page structure.
5093 * @param pPool The pool.
5094 * @param HCPhys The HC physical address of the shadow page.
5095 */
5096PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5097{
5098 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5099
5100 /*
5101 * Look up the page.
5102 */
5103 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5104
5105 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5106 return pPage;
5107}
5108
5109
5110/**
5111 * Internal worker for finding a page for debugging purposes, no assertions.
5112 *
5113 * @returns Pointer to the shadow page structure. NULL on if not found.
5114 * @param pPool The pool.
5115 * @param HCPhys The HC physical address of the shadow page.
5116 */
5117PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5118{
5119 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5120 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5121}
5122
5123
5124#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5125/**
5126 * Flush the specified page if present
5127 *
5128 * @param pVM The VM handle.
5129 * @param GCPhys Guest physical address of the page to flush
5130 */
5131void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5132{
5133 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5134
5135 VM_ASSERT_EMT(pVM);
5136
5137 /*
5138 * Look up the GCPhys in the hash.
5139 */
5140 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5141 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5142 if (i == NIL_PGMPOOL_IDX)
5143 return;
5144
5145 do
5146 {
5147 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5148 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5149 {
5150 switch (pPage->enmKind)
5151 {
5152 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5153 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5154 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5155 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5156 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5157 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5158 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5159 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5160 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5161 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5162 case PGMPOOLKIND_64BIT_PML4:
5163 case PGMPOOLKIND_32BIT_PD:
5164 case PGMPOOLKIND_PAE_PDPT:
5165 {
5166 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5167#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5168 if (pPage->fDirty)
5169 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5170 else
5171#endif
5172 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5173 Assert(!pgmPoolIsPageLocked(pPage));
5174 pgmPoolMonitorChainFlush(pPool, pPage);
5175 return;
5176 }
5177
5178 /* ignore, no monitoring. */
5179 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5180 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5181 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5182 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5183 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5184 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5185 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5186 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5187 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5188 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5189 case PGMPOOLKIND_ROOT_NESTED:
5190 case PGMPOOLKIND_PAE_PD_PHYS:
5191 case PGMPOOLKIND_PAE_PDPT_PHYS:
5192 case PGMPOOLKIND_32BIT_PD_PHYS:
5193 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5194 break;
5195
5196 default:
5197 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5198 }
5199 }
5200
5201 /* next */
5202 i = pPage->iNext;
5203 } while (i != NIL_PGMPOOL_IDX);
5204 return;
5205}
5206#endif /* IN_RING3 */
5207
5208#ifdef IN_RING3
5209
5210
5211/**
5212 * Reset CPU on hot plugging.
5213 *
5214 * @param pVM The VM handle.
5215 * @param pVCpu The virtual CPU.
5216 */
5217void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5218{
5219 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
5220
5221 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5222 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5223 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5224}
5225
5226
5227/**
5228 * Flushes the entire cache.
5229 *
5230 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5231 * this and execute this CR3 flush.
5232 *
5233 * @param pPool The pool.
5234 */
5235void pgmR3PoolReset(PVM pVM)
5236{
5237 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5238
5239 PGM_LOCK_ASSERT_OWNER(pVM);
5240 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5241 LogFlow(("pgmR3PoolReset:\n"));
5242
5243 /*
5244 * If there are no pages in the pool, there is nothing to do.
5245 */
5246 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5247 {
5248 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5249 return;
5250 }
5251
5252 /*
5253 * Exit the shadow mode since we're going to clear everything,
5254 * including the root page.
5255 */
5256 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5257 {
5258 PVMCPU pVCpu = &pVM->aCpus[i];
5259 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
5260 }
5261
5262 /*
5263 * Nuke the free list and reinsert all pages into it.
5264 */
5265 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5266 {
5267 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5268
5269 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
5270 if (pPage->fMonitored)
5271 pgmPoolMonitorFlush(pPool, pPage);
5272 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5273 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5274 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5275 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5276 pPage->cModifications = 0;
5277 pPage->GCPhys = NIL_RTGCPHYS;
5278 pPage->enmKind = PGMPOOLKIND_FREE;
5279 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5280 Assert(pPage->idx == i);
5281 pPage->iNext = i + 1;
5282 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5283 pPage->fSeenNonGlobal = false;
5284 pPage->fMonitored = false;
5285#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5286 pPage->fDirty = false;
5287#endif
5288 pPage->fCached = false;
5289 pPage->fReusedFlushPending = false;
5290 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5291 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5292 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5293 pPage->cLocked = 0;
5294 }
5295 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5296 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5297 pPool->cUsedPages = 0;
5298
5299 /*
5300 * Zap and reinitialize the user records.
5301 */
5302 pPool->cPresent = 0;
5303 pPool->iUserFreeHead = 0;
5304 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5305 const unsigned cMaxUsers = pPool->cMaxUsers;
5306 for (unsigned i = 0; i < cMaxUsers; i++)
5307 {
5308 paUsers[i].iNext = i + 1;
5309 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5310 paUsers[i].iUserTable = 0xfffffffe;
5311 }
5312 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5313
5314 /*
5315 * Clear all the GCPhys links and rebuild the phys ext free list.
5316 */
5317 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
5318 pRam;
5319 pRam = pRam->CTX_SUFF(pNext))
5320 {
5321 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5322 while (iPage-- > 0)
5323 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5324 }
5325
5326 pPool->iPhysExtFreeHead = 0;
5327 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5328 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5329 for (unsigned i = 0; i < cMaxPhysExts; i++)
5330 {
5331 paPhysExts[i].iNext = i + 1;
5332 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5333 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5334 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5335 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5336 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5337 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5338 }
5339 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5340
5341 /*
5342 * Just zap the modified list.
5343 */
5344 pPool->cModifiedPages = 0;
5345 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5346
5347 /*
5348 * Clear the GCPhys hash and the age list.
5349 */
5350 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5351 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5352 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5353 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5354
5355#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5356 /* Clear all dirty pages. */
5357 pPool->idxFreeDirtyPage = 0;
5358 pPool->cDirtyPages = 0;
5359 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
5360 pPool->aDirtyPages[i].uIdx = NIL_PGMPOOL_IDX;
5361#endif
5362
5363 /*
5364 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5365 */
5366 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
5367 {
5368 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5369 pPage->iNext = NIL_PGMPOOL_IDX;
5370 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5371 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5372 pPage->cModifications = 0;
5373 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
5374 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5375 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5376 if (pPage->fMonitored)
5377 {
5378 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK,
5379 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
5380 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
5381 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
5382 pPool->pszAccessHandler);
5383 AssertFatalRCSuccess(rc);
5384 pgmPoolHashInsert(pPool, pPage);
5385 }
5386 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
5387 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
5388 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
5389 }
5390
5391 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5392 {
5393 /*
5394 * Re-enter the shadowing mode and assert Sync CR3 FF.
5395 */
5396 PVMCPU pVCpu = &pVM->aCpus[i];
5397 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5398 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5399 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5400 }
5401
5402 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5403}
5404#endif /* IN_RING3 */
5405
5406#ifdef LOG_ENABLED
5407static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5408{
5409 switch(enmKind)
5410 {
5411 case PGMPOOLKIND_INVALID:
5412 return "PGMPOOLKIND_INVALID";
5413 case PGMPOOLKIND_FREE:
5414 return "PGMPOOLKIND_FREE";
5415 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5416 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5417 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5418 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5419 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5420 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5421 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5422 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5423 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5424 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5425 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5426 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5427 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5428 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5429 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5430 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5431 case PGMPOOLKIND_32BIT_PD:
5432 return "PGMPOOLKIND_32BIT_PD";
5433 case PGMPOOLKIND_32BIT_PD_PHYS:
5434 return "PGMPOOLKIND_32BIT_PD_PHYS";
5435 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5436 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5437 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5438 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5439 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5440 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5441 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5442 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5443 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5444 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5445 case PGMPOOLKIND_PAE_PD_PHYS:
5446 return "PGMPOOLKIND_PAE_PD_PHYS";
5447 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5448 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5449 case PGMPOOLKIND_PAE_PDPT:
5450 return "PGMPOOLKIND_PAE_PDPT";
5451 case PGMPOOLKIND_PAE_PDPT_PHYS:
5452 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5453 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5454 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5455 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5456 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5457 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5458 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5459 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5460 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5461 case PGMPOOLKIND_64BIT_PML4:
5462 return "PGMPOOLKIND_64BIT_PML4";
5463 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5464 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5465 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5466 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5467 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5468 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5469 case PGMPOOLKIND_ROOT_NESTED:
5470 return "PGMPOOLKIND_ROOT_NESTED";
5471 }
5472 return "Unknown kind!";
5473}
5474#endif /* LOG_ENABLED*/
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette