VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 41460

Last change on this file since 41460 was 41460, checked in by vboxsync, 13 years ago

PGMPool: Enabled A20 state.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 209.0 KB
Line 
1/* $Id: PGMAllPool.cpp 41460 2012-05-28 10:38:47Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2010 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#include <VBox/vmm/pgm.h>
24#include <VBox/vmm/mm.h>
25#include <VBox/vmm/em.h>
26#include <VBox/vmm/cpum.h>
27#ifdef IN_RC
28# include <VBox/vmm/patm.h>
29#endif
30#include "PGMInternal.h"
31#include <VBox/vmm/vm.h>
32#include "PGMInline.h"
33#include <VBox/disopcode.h>
34#include <VBox/vmm/hwacc_vmx.h>
35
36#include <VBox/log.h>
37#include <VBox/err.h>
38#include <iprt/asm.h>
39#include <iprt/asm-amd64-x86.h>
40#include <iprt/string.h>
41
42
43/*******************************************************************************
44* Internal Functions *
45*******************************************************************************/
46RT_C_DECLS_BEGIN
47DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
48DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
49static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
50static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
51static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
52#ifndef IN_RING3
53DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
54#endif
55#ifdef LOG_ENABLED
56static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
57#endif
58#if 0 /*defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)*/
59static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
60#endif
61
62int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
63PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
64void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
65void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
66
67RT_C_DECLS_END
68
69
70/**
71 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
72 *
73 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
74 * @param enmKind The page kind.
75 */
76DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
77{
78 switch (enmKind)
79 {
80 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
81 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
82 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
83 return true;
84 default:
85 return false;
86 }
87}
88
89
90/**
91 * Flushes a chain of pages sharing the same access monitor.
92 *
93 * @returns VBox status code suitable for scheduling.
94 * @param pPool The pool.
95 * @param pPage A page in the chain.
96 * @todo VBOXSTRICTRC
97 */
98int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
99{
100 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
101
102 /*
103 * Find the list head.
104 */
105 uint16_t idx = pPage->idx;
106 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
107 {
108 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
109 {
110 idx = pPage->iMonitoredPrev;
111 Assert(idx != pPage->idx);
112 pPage = &pPool->aPages[idx];
113 }
114 }
115
116 /*
117 * Iterate the list flushing each shadow page.
118 */
119 int rc = VINF_SUCCESS;
120 for (;;)
121 {
122 idx = pPage->iMonitoredNext;
123 Assert(idx != pPage->idx);
124 if (pPage->idx >= PGMPOOL_IDX_FIRST)
125 {
126 int rc2 = pgmPoolFlushPage(pPool, pPage);
127 AssertRC(rc2);
128 }
129 /* next */
130 if (idx == NIL_PGMPOOL_IDX)
131 break;
132 pPage = &pPool->aPages[idx];
133 }
134 return rc;
135}
136
137
138/**
139 * Wrapper for getting the current context pointer to the entry being modified.
140 *
141 * @returns VBox status code suitable for scheduling.
142 * @param pVM The VM handle.
143 * @param pvDst Destination address
144 * @param pvSrc Source guest virtual address.
145 * @param GCPhysSrc The source guest physical address.
146 * @param cb Size of data to read
147 */
148DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc,
149 RTGCPHYS GCPhysSrc, size_t cb)
150{
151#if defined(IN_RING3)
152 NOREF(pVM); NOREF(GCPhysSrc);
153 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
154 return VINF_SUCCESS;
155#else
156 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
157 NOREF(pvSrc);
158 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
159#endif
160}
161
162
163/**
164 * Process shadow entries before they are changed by the guest.
165 *
166 * For PT entries we will clear them. For PD entries, we'll simply check
167 * for mapping conflicts and set the SyncCR3 FF if found.
168 *
169 * @param pVCpu VMCPU handle
170 * @param pPool The pool.
171 * @param pPage The head page.
172 * @param GCPhysFault The guest physical fault address.
173 * @param uAddress In R0 and GC this is the guest context fault address (flat).
174 * In R3 this is the host context 'fault' address.
175 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
176 */
177void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault,
178 CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, unsigned cbWrite)
179{
180 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
181 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
182 PVM pVM = pPool->CTX_SUFF(pVM);
183 NOREF(pVCpu);
184
185 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))pvAddress, GCPhysFault, cbWrite));
186
187 for (;;)
188 {
189 union
190 {
191 void *pv;
192 PX86PT pPT;
193 PPGMSHWPTPAE pPTPae;
194 PX86PD pPD;
195 PX86PDPAE pPDPae;
196 PX86PDPT pPDPT;
197 PX86PML4 pPML4;
198 } uShw;
199
200 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s\n", pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
201
202 uShw.pv = NULL;
203 switch (pPage->enmKind)
204 {
205 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
206 {
207 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
208 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
209 const unsigned iShw = off / sizeof(X86PTE);
210 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
211 if (uShw.pPT->a[iShw].n.u1Present)
212 {
213 X86PTE GstPte;
214
215 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
216 AssertRC(rc);
217 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
218 pgmPoolTracDerefGCPhysHint(pPool, pPage,
219 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
220 GstPte.u & X86_PTE_PG_MASK,
221 iShw);
222 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
223 }
224 break;
225 }
226
227 /* page/2 sized */
228 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
229 {
230 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
231 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
232 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
233 {
234 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
235 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
236 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
237 {
238 X86PTE GstPte;
239 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
240 AssertRC(rc);
241
242 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
243 pgmPoolTracDerefGCPhysHint(pPool, pPage,
244 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
245 GstPte.u & X86_PTE_PG_MASK,
246 iShw);
247 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
248 }
249 }
250 break;
251 }
252
253 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
254 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
255 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
256 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
257 {
258 unsigned iGst = off / sizeof(X86PDE);
259 unsigned iShwPdpt = iGst / 256;
260 unsigned iShw = (iGst % 256) * 2;
261 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
262
263 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
264 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
265 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
266 {
267 for (unsigned i = 0; i < 2; i++)
268 {
269# ifndef IN_RING0
270 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
271 {
272 Assert(pgmMapAreMappingsEnabled(pVM));
273 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
274 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
275 break;
276 }
277# endif /* !IN_RING0 */
278 if (uShw.pPDPae->a[iShw+i].n.u1Present)
279 {
280 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
281 pgmPoolFree(pVM,
282 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
283 pPage->idx,
284 iShw + i);
285 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw+i].u, 0);
286 }
287
288 /* paranoia / a bit assumptive. */
289 if ( (off & 3)
290 && (off & 3) + cbWrite > 4)
291 {
292 const unsigned iShw2 = iShw + 2 + i;
293 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
294 {
295# ifndef IN_RING0
296 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
297 {
298 Assert(pgmMapAreMappingsEnabled(pVM));
299 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
300 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
301 break;
302 }
303# endif /* !IN_RING0 */
304 if (uShw.pPDPae->a[iShw2].n.u1Present)
305 {
306 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
307 pgmPoolFree(pVM,
308 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
309 pPage->idx,
310 iShw2);
311 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
312 }
313 }
314 }
315 }
316 }
317 break;
318 }
319
320 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
321 {
322 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
323 const unsigned iShw = off / sizeof(X86PTEPAE);
324 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
325 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
326 {
327 X86PTEPAE GstPte;
328 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
329 AssertRC(rc);
330
331 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
332 pgmPoolTracDerefGCPhysHint(pPool, pPage,
333 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
334 GstPte.u & X86_PTE_PAE_PG_MASK,
335 iShw);
336 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
337 }
338
339 /* paranoia / a bit assumptive. */
340 if ( (off & 7)
341 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
342 {
343 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
344 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
345
346 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
347 {
348 X86PTEPAE GstPte;
349# ifdef IN_RING3
350 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
351# else
352 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
353# endif
354 AssertRC(rc);
355 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
356 pgmPoolTracDerefGCPhysHint(pPool, pPage,
357 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
358 GstPte.u & X86_PTE_PAE_PG_MASK,
359 iShw2);
360 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
361 }
362 }
363 break;
364 }
365
366 case PGMPOOLKIND_32BIT_PD:
367 {
368 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
369 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
370
371 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
372 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
373# ifndef IN_RING0
374 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
375 {
376 Assert(pgmMapAreMappingsEnabled(pVM));
377 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
378 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
379 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
380 break;
381 }
382# endif /* !IN_RING0 */
383# ifndef IN_RING0
384 else
385# endif /* !IN_RING0 */
386 {
387 if (uShw.pPD->a[iShw].n.u1Present)
388 {
389 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
390 pgmPoolFree(pVM,
391 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
392 pPage->idx,
393 iShw);
394 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
395 }
396 }
397 /* paranoia / a bit assumptive. */
398 if ( (off & 3)
399 && (off & 3) + cbWrite > sizeof(X86PTE))
400 {
401 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
402 if ( iShw2 != iShw
403 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
404 {
405# ifndef IN_RING0
406 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
407 {
408 Assert(pgmMapAreMappingsEnabled(pVM));
409 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
410 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
411 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
412 break;
413 }
414# endif /* !IN_RING0 */
415 if (uShw.pPD->a[iShw2].n.u1Present)
416 {
417 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
418 pgmPoolFree(pVM,
419 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
420 pPage->idx,
421 iShw2);
422 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
423 }
424 }
425 }
426#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
427 if ( uShw.pPD->a[iShw].n.u1Present
428 && !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
429 {
430 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
431# ifdef IN_RC /* TLB load - we're pushing things a bit... */
432 ASMProbeReadByte(pvAddress);
433# endif
434 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
435 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
436 }
437#endif
438 break;
439 }
440
441 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
442 {
443 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
444 const unsigned iShw = off / sizeof(X86PDEPAE);
445 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
446#ifndef IN_RING0
447 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
448 {
449 Assert(pgmMapAreMappingsEnabled(pVM));
450 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
451 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
452 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
453 break;
454 }
455#endif /* !IN_RING0 */
456 /*
457 * Causes trouble when the guest uses a PDE to refer to the whole page table level
458 * structure. (Invalidate here; faults later on when it tries to change the page
459 * table entries -> recheck; probably only applies to the RC case.)
460 */
461# ifndef IN_RING0
462 else
463# endif /* !IN_RING0 */
464 {
465 if (uShw.pPDPae->a[iShw].n.u1Present)
466 {
467 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
468 pgmPoolFree(pVM,
469 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
470 pPage->idx,
471 iShw);
472 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
473 }
474 }
475 /* paranoia / a bit assumptive. */
476 if ( (off & 7)
477 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
478 {
479 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
480 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
481
482#ifndef IN_RING0
483 if ( iShw2 != iShw
484 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
485 {
486 Assert(pgmMapAreMappingsEnabled(pVM));
487 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
488 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
489 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
490 break;
491 }
492#endif /* !IN_RING0 */
493# ifndef IN_RING0
494 else
495# endif /* !IN_RING0 */
496 if (uShw.pPDPae->a[iShw2].n.u1Present)
497 {
498 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
499 pgmPoolFree(pVM,
500 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
501 pPage->idx,
502 iShw2);
503 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
504 }
505 }
506 break;
507 }
508
509 case PGMPOOLKIND_PAE_PDPT:
510 {
511 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
512 /*
513 * Hopefully this doesn't happen very often:
514 * - touching unused parts of the page
515 * - messing with the bits of pd pointers without changing the physical address
516 */
517 /* PDPT roots are not page aligned; 32 byte only! */
518 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
519
520 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
521 const unsigned iShw = offPdpt / sizeof(X86PDPE);
522 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
523 {
524# ifndef IN_RING0
525 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
526 {
527 Assert(pgmMapAreMappingsEnabled(pVM));
528 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
529 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
530 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
531 break;
532 }
533# endif /* !IN_RING0 */
534# ifndef IN_RING0
535 else
536# endif /* !IN_RING0 */
537 if (uShw.pPDPT->a[iShw].n.u1Present)
538 {
539 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
540 pgmPoolFree(pVM,
541 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
542 pPage->idx,
543 iShw);
544 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
545 }
546
547 /* paranoia / a bit assumptive. */
548 if ( (offPdpt & 7)
549 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
550 {
551 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
552 if ( iShw2 != iShw
553 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
554 {
555# ifndef IN_RING0
556 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
557 {
558 Assert(pgmMapAreMappingsEnabled(pVM));
559 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
560 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
561 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
562 break;
563 }
564# endif /* !IN_RING0 */
565# ifndef IN_RING0
566 else
567# endif /* !IN_RING0 */
568 if (uShw.pPDPT->a[iShw2].n.u1Present)
569 {
570 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
571 pgmPoolFree(pVM,
572 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
573 pPage->idx,
574 iShw2);
575 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
576 }
577 }
578 }
579 }
580 break;
581 }
582
583#ifndef IN_RC
584 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
585 {
586 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
587 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
588 const unsigned iShw = off / sizeof(X86PDEPAE);
589 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
590 if (uShw.pPDPae->a[iShw].n.u1Present)
591 {
592 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
593 pgmPoolFree(pVM,
594 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
595 pPage->idx,
596 iShw);
597 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
598 }
599 /* paranoia / a bit assumptive. */
600 if ( (off & 7)
601 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
602 {
603 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
604 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
605
606 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
607 if (uShw.pPDPae->a[iShw2].n.u1Present)
608 {
609 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
610 pgmPoolFree(pVM,
611 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
612 pPage->idx,
613 iShw2);
614 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
615 }
616 }
617 break;
618 }
619
620 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
621 {
622 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
623 /*
624 * Hopefully this doesn't happen very often:
625 * - messing with the bits of pd pointers without changing the physical address
626 */
627 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
628 const unsigned iShw = off / sizeof(X86PDPE);
629 if (uShw.pPDPT->a[iShw].n.u1Present)
630 {
631 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
632 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
633 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
634 }
635 /* paranoia / a bit assumptive. */
636 if ( (off & 7)
637 && (off & 7) + cbWrite > sizeof(X86PDPE))
638 {
639 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
640 if (uShw.pPDPT->a[iShw2].n.u1Present)
641 {
642 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
643 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
644 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
645 }
646 }
647 break;
648 }
649
650 case PGMPOOLKIND_64BIT_PML4:
651 {
652 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
653 /*
654 * Hopefully this doesn't happen very often:
655 * - messing with the bits of pd pointers without changing the physical address
656 */
657 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
658 const unsigned iShw = off / sizeof(X86PDPE);
659 if (uShw.pPML4->a[iShw].n.u1Present)
660 {
661 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
662 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
663 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
664 }
665 /* paranoia / a bit assumptive. */
666 if ( (off & 7)
667 && (off & 7) + cbWrite > sizeof(X86PDPE))
668 {
669 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
670 if (uShw.pPML4->a[iShw2].n.u1Present)
671 {
672 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
673 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
674 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
675 }
676 }
677 break;
678 }
679#endif /* IN_RING0 */
680
681 default:
682 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
683 }
684 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
685
686 /* next */
687 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
688 return;
689 pPage = &pPool->aPages[pPage->iMonitoredNext];
690 }
691}
692
693# ifndef IN_RING3
694
695/**
696 * Checks if a access could be a fork operation in progress.
697 *
698 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
699 *
700 * @returns true if it's likely that we're forking, otherwise false.
701 * @param pPool The pool.
702 * @param pDis The disassembled instruction.
703 * @param offFault The access offset.
704 */
705DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
706{
707 /*
708 * i386 linux is using btr to clear X86_PTE_RW.
709 * The functions involved are (2.6.16 source inspection):
710 * clear_bit
711 * ptep_set_wrprotect
712 * copy_one_pte
713 * copy_pte_range
714 * copy_pmd_range
715 * copy_pud_range
716 * copy_page_range
717 * dup_mmap
718 * dup_mm
719 * copy_mm
720 * copy_process
721 * do_fork
722 */
723 if ( pDis->pCurInstr->opcode == OP_BTR
724 && !(offFault & 4)
725 /** @todo Validate that the bit index is X86_PTE_RW. */
726 )
727 {
728 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
729 return true;
730 }
731 return false;
732}
733
734
735/**
736 * Determine whether the page is likely to have been reused.
737 *
738 * @returns true if we consider the page as being reused for a different purpose.
739 * @returns false if we consider it to still be a paging page.
740 * @param pVM The VM handle.
741 * @param pVCpu VMCPU Handle.
742 * @param pRegFrame Trap register frame.
743 * @param pDis The disassembly info for the faulting instruction.
744 * @param pvFault The fault address.
745 *
746 * @remark The REP prefix check is left to the caller because of STOSD/W.
747 */
748DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
749{
750#ifndef IN_RC
751 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
752 if ( HWACCMHasPendingIrq(pVM)
753 && (pRegFrame->rsp - pvFault) < 32)
754 {
755 /* Fault caused by stack writes while trying to inject an interrupt event. */
756 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
757 return true;
758 }
759#else
760 NOREF(pVM); NOREF(pvFault);
761#endif
762
763 LogFlow(("Reused instr %RGv %d at %RGv param1.flags=%x param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->opcode, pvFault, pDis->param1.flags, pDis->param1.base.reg_gen));
764
765 /* Non-supervisor mode write means it's used for something else. */
766 if (CPUMGetGuestCPL(pVCpu, pRegFrame) != 0)
767 return true;
768
769 switch (pDis->pCurInstr->opcode)
770 {
771 /* call implies the actual push of the return address faulted */
772 case OP_CALL:
773 Log4(("pgmPoolMonitorIsReused: CALL\n"));
774 return true;
775 case OP_PUSH:
776 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
777 return true;
778 case OP_PUSHF:
779 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
780 return true;
781 case OP_PUSHA:
782 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
783 return true;
784 case OP_FXSAVE:
785 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
786 return true;
787 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
788 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
789 return true;
790 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
791 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
792 return true;
793 case OP_MOVSWD:
794 case OP_STOSWD:
795 if ( pDis->prefix == (PREFIX_REP|PREFIX_REX)
796 && pRegFrame->rcx >= 0x40
797 )
798 {
799 Assert(pDis->mode == CPUMODE_64BIT);
800
801 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
802 return true;
803 }
804 return false;
805 }
806 if ( ( (pDis->param1.flags & USE_REG_GEN32)
807 || (pDis->param1.flags & USE_REG_GEN64))
808 && (pDis->param1.base.reg_gen == USE_REG_ESP))
809 {
810 Log4(("pgmPoolMonitorIsReused: ESP\n"));
811 return true;
812 }
813
814 return false;
815}
816
817
818/**
819 * Flushes the page being accessed.
820 *
821 * @returns VBox status code suitable for scheduling.
822 * @param pVM The VM handle.
823 * @param pVCpu The VMCPU handle.
824 * @param pPool The pool.
825 * @param pPage The pool page (head).
826 * @param pDis The disassembly of the write instruction.
827 * @param pRegFrame The trap register frame.
828 * @param GCPhysFault The fault address as guest physical address.
829 * @param pvFault The fault address.
830 * @todo VBOXSTRICTRC
831 */
832static int pgmPoolAccessHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
833 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
834{
835 NOREF(GCPhysFault);
836
837 /*
838 * First, do the flushing.
839 */
840 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
841
842 /*
843 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
844 * Must do this in raw mode (!); XP boot will fail otherwise.
845 */
846 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
847 if (RT_SUCCESS(rc2))
848 AssertMsg(rc2 == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
849 else if (rc2 == VERR_EM_INTERPRETER)
850 {
851#ifdef IN_RC
852 if (PATMIsPatchGCAddr(pVM, pRegFrame->eip))
853 {
854 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
855 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
856 rc = VINF_SUCCESS;
857 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
858 }
859 else
860#endif
861 {
862 rc = VINF_EM_RAW_EMULATE_INSTR;
863 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
864 }
865 }
866 else
867 rc = VBOXSTRICTRC_VAL(rc2);
868
869 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
870 return rc;
871}
872
873
874/**
875 * Handles the STOSD write accesses.
876 *
877 * @returns VBox status code suitable for scheduling.
878 * @param pVM The VM handle.
879 * @param pPool The pool.
880 * @param pPage The pool page (head).
881 * @param pDis The disassembly of the write instruction.
882 * @param pRegFrame The trap register frame.
883 * @param GCPhysFault The fault address as guest physical address.
884 * @param pvFault The fault address.
885 */
886DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
887 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
888{
889 unsigned uIncrement = pDis->param1.size;
890 NOREF(pVM);
891
892 Assert(pDis->mode == CPUMODE_32BIT || pDis->mode == CPUMODE_64BIT);
893 Assert(pRegFrame->rcx <= 0x20);
894
895#ifdef VBOX_STRICT
896 if (pDis->opmode == CPUMODE_32BIT)
897 Assert(uIncrement == 4);
898 else
899 Assert(uIncrement == 8);
900#endif
901
902 Log3(("pgmPoolAccessHandlerSTOSD\n"));
903
904 /*
905 * Increment the modification counter and insert it into the list
906 * of modified pages the first time.
907 */
908 if (!pPage->cModifications++)
909 pgmPoolMonitorModifiedInsert(pPool, pPage);
910
911 /*
912 * Execute REP STOSD.
913 *
914 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
915 * write situation, meaning that it's safe to write here.
916 */
917 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
918 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
919 while (pRegFrame->rcx)
920 {
921#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
922 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
923 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
924 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
925#else
926 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
927#endif
928#ifdef IN_RC
929 *(uint32_t *)(uintptr_t)pu32 = pRegFrame->eax;
930#else
931 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
932#endif
933 pu32 += uIncrement;
934 GCPhysFault += uIncrement;
935 pRegFrame->rdi += uIncrement;
936 pRegFrame->rcx--;
937 }
938 pRegFrame->rip += pDis->opsize;
939
940 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
941 return VINF_SUCCESS;
942}
943
944
945/**
946 * Handles the simple write accesses.
947 *
948 * @returns VBox status code suitable for scheduling.
949 * @param pVM The VM handle.
950 * @param pVCpu The VMCPU handle.
951 * @param pPool The pool.
952 * @param pPage The pool page (head).
953 * @param pDis The disassembly of the write instruction.
954 * @param pRegFrame The trap register frame.
955 * @param GCPhysFault The fault address as guest physical address.
956 * @param pvFault The fault address.
957 * @param pfReused Reused state (in/out)
958 */
959DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
960 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
961{
962 Log3(("pgmPoolAccessHandlerSimple\n"));
963 NOREF(pfReused); /* initialized by caller */
964
965 /*
966 * Increment the modification counter and insert it into the list
967 * of modified pages the first time.
968 */
969 if (!pPage->cModifications++)
970 pgmPoolMonitorModifiedInsert(pPool, pPage);
971
972 /*
973 * Clear all the pages. ASSUMES that pvFault is readable.
974 */
975#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
976 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
977 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
978 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
979#else
980 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
981#endif
982
983 /*
984 * Interpret the instruction.
985 */
986 VBOXSTRICTRC rc = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
987 if (RT_SUCCESS(rc))
988 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
989 else if (rc == VERR_EM_INTERPRETER)
990 {
991 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
992 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode));
993 rc = VINF_EM_RAW_EMULATE_INSTR;
994 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
995 }
996
997#if 0 /* experimental code */
998 if (rc == VINF_SUCCESS)
999 {
1000 switch (pPage->enmKind)
1001 {
1002 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1003 {
1004 X86PTEPAE GstPte;
1005 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1006 AssertRC(rc);
1007
1008 /* Check the new value written by the guest. If present and with a bogus physical address, then
1009 * it's fairly safe to assume the guest is reusing the PT.
1010 */
1011 if (GstPte.n.u1Present)
1012 {
1013 RTHCPHYS HCPhys = -1;
1014 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1015 if (rc != VINF_SUCCESS)
1016 {
1017 *pfReused = true;
1018 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1019 }
1020 }
1021 break;
1022 }
1023 }
1024 }
1025#endif
1026
1027 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc\n", VBOXSTRICTRC_VAL(rc)));
1028 return VBOXSTRICTRC_VAL(rc);
1029}
1030
1031
1032/**
1033 * \#PF Handler callback for PT write accesses.
1034 *
1035 * @returns VBox status code (appropriate for GC return).
1036 * @param pVM The VM handle.
1037 * @param uErrorCode CPU Error code.
1038 * @param pRegFrame Trap register frame.
1039 * NULL on DMA and other non CPU access.
1040 * @param pvFault The fault address (cr2).
1041 * @param GCPhysFault The GC physical address corresponding to pvFault.
1042 * @param pvUser User argument.
1043 */
1044DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault,
1045 RTGCPHYS GCPhysFault, void *pvUser)
1046{
1047 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1048 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1049 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1050 PVMCPU pVCpu = VMMGetCpu(pVM);
1051 unsigned cMaxModifications;
1052 bool fForcedFlush = false;
1053 NOREF(uErrorCode);
1054
1055 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1056
1057 pgmLock(pVM);
1058 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1059 {
1060 /* Pool page changed while we were waiting for the lock; ignore. */
1061 Log(("CPU%d: pgmPoolAccessHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1062 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1063 pgmUnlock(pVM);
1064 return VINF_SUCCESS;
1065 }
1066#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1067 if (pPage->fDirty)
1068 {
1069 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH));
1070 pgmUnlock(pVM);
1071 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1072 }
1073#endif
1074
1075#if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1076 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1077 {
1078 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1079 void *pvGst;
1080 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1081 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1082 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1083 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1084 }
1085#endif
1086
1087 /*
1088 * Disassemble the faulting instruction.
1089 */
1090 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1091 int rc = EMInterpretDisasOne(pVM, pVCpu, pRegFrame, pDis, NULL);
1092 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1093 {
1094 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1095 pgmUnlock(pVM);
1096 return rc;
1097 }
1098
1099 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1100
1101 /*
1102 * We should ALWAYS have the list head as user parameter. This
1103 * is because we use that page to record the changes.
1104 */
1105 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1106
1107#ifdef IN_RING0
1108 /* Maximum nr of modifications depends on the page type. */
1109 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1110 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1111 cMaxModifications = 4;
1112 else
1113 cMaxModifications = 24;
1114#else
1115 cMaxModifications = 48;
1116#endif
1117
1118 /*
1119 * Incremental page table updates should weigh more than random ones.
1120 * (Only applies when started from offset 0)
1121 */
1122 pVCpu->pgm.s.cPoolAccessHandler++;
1123 if ( pPage->GCPtrLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1124 && pPage->GCPtrLastAccessHandlerRip < pRegFrame->rip + 0x40
1125 && pvFault == (pPage->GCPtrLastAccessHandlerFault + pDis->param1.size)
1126 && pVCpu->pgm.s.cPoolAccessHandler == pPage->cLastAccessHandler + 1)
1127 {
1128 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1129 Assert(pPage->cModifications < 32000);
1130 pPage->cModifications = pPage->cModifications * 2;
1131 pPage->GCPtrLastAccessHandlerFault = pvFault;
1132 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1133 if (pPage->cModifications >= cMaxModifications)
1134 {
1135 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1136 fForcedFlush = true;
1137 }
1138 }
1139
1140 if (pPage->cModifications >= cMaxModifications)
1141 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1142
1143 /*
1144 * Check if it's worth dealing with.
1145 */
1146 bool fReused = false;
1147 bool fNotReusedNotForking = false;
1148 if ( ( pPage->cModifications < cMaxModifications /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1149 || pgmPoolIsPageLocked(pPage)
1150 )
1151 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1152 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1153 {
1154 /*
1155 * Simple instructions, no REP prefix.
1156 */
1157 if (!(pDis->prefix & (PREFIX_REP | PREFIX_REPNE)))
1158 {
1159 rc = pgmPoolAccessHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1160 if (fReused)
1161 goto flushPage;
1162
1163 /* A mov instruction to change the first page table entry will be remembered so we can detect
1164 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1165 */
1166 if ( rc == VINF_SUCCESS
1167 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1168 && pDis->pCurInstr->opcode == OP_MOV
1169 && (pvFault & PAGE_OFFSET_MASK) == 0)
1170 {
1171 pPage->GCPtrLastAccessHandlerFault = pvFault;
1172 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1173 pPage->GCPtrLastAccessHandlerRip = pRegFrame->rip;
1174 /* Make sure we don't kick out a page too quickly. */
1175 if (pPage->cModifications > 8)
1176 pPage->cModifications = 2;
1177 }
1178 else if (pPage->GCPtrLastAccessHandlerFault == pvFault)
1179 {
1180 /* ignore the 2nd write to this page table entry. */
1181 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1182 }
1183 else
1184 {
1185 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
1186 pPage->GCPtrLastAccessHandlerRip = 0;
1187 }
1188
1189 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1190 pgmUnlock(pVM);
1191 return rc;
1192 }
1193
1194 /*
1195 * Windows is frequently doing small memset() operations (netio test 4k+).
1196 * We have to deal with these or we'll kill the cache and performance.
1197 */
1198 if ( pDis->pCurInstr->opcode == OP_STOSWD
1199 && !pRegFrame->eflags.Bits.u1DF
1200 && pDis->opmode == pDis->mode
1201 && pDis->addrmode == pDis->mode)
1202 {
1203 bool fValidStosd = false;
1204
1205 if ( pDis->mode == CPUMODE_32BIT
1206 && pDis->prefix == PREFIX_REP
1207 && pRegFrame->ecx <= 0x20
1208 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1209 && !((uintptr_t)pvFault & 3)
1210 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1211 )
1212 {
1213 fValidStosd = true;
1214 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1215 }
1216 else
1217 if ( pDis->mode == CPUMODE_64BIT
1218 && pDis->prefix == (PREFIX_REP | PREFIX_REX)
1219 && pRegFrame->rcx <= 0x20
1220 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1221 && !((uintptr_t)pvFault & 7)
1222 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1223 )
1224 {
1225 fValidStosd = true;
1226 }
1227
1228 if (fValidStosd)
1229 {
1230 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1231 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1232 pgmUnlock(pVM);
1233 return rc;
1234 }
1235 }
1236
1237 /* REP prefix, don't bother. */
1238 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1239 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1240 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode, pDis->prefix));
1241 fNotReusedNotForking = true;
1242 }
1243
1244#if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1245 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1246 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1247 */
1248 if ( pPage->cModifications >= cMaxModifications
1249 && !fForcedFlush
1250 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1251 && ( fNotReusedNotForking
1252 || ( !pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
1253 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1254 )
1255 )
1256 {
1257 Assert(!pgmPoolIsPageLocked(pPage));
1258 Assert(pPage->fDirty == false);
1259
1260 /* Flush any monitored duplicates as we will disable write protection. */
1261 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1262 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1263 {
1264 PPGMPOOLPAGE pPageHead = pPage;
1265
1266 /* Find the monitor head. */
1267 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1268 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1269
1270 while (pPageHead)
1271 {
1272 unsigned idxNext = pPageHead->iMonitoredNext;
1273
1274 if (pPageHead != pPage)
1275 {
1276 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1277 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1278 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1279 AssertRC(rc2);
1280 }
1281
1282 if (idxNext == NIL_PGMPOOL_IDX)
1283 break;
1284
1285 pPageHead = &pPool->aPages[idxNext];
1286 }
1287 }
1288
1289 /* The flushing above might fail for locked pages, so double check. */
1290 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1291 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1292 {
1293 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1294
1295 /* Temporarily allow write access to the page table again. */
1296 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK, pPage->GCPhys & PAGE_BASE_GC_MASK);
1297 if (rc == VINF_SUCCESS)
1298 {
1299 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1300 AssertMsg(rc == VINF_SUCCESS
1301 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1302 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1303 || rc == VERR_PAGE_NOT_PRESENT,
1304 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1305# ifdef VBOX_STRICT
1306 pPage->GCPtrDirtyFault = pvFault;
1307# endif
1308
1309 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1310 pgmUnlock(pVM);
1311 return rc;
1312 }
1313 }
1314 }
1315#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1316
1317 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushModOverflow));
1318flushPage:
1319 /*
1320 * Not worth it, so flush it.
1321 *
1322 * If we considered it to be reused, don't go back to ring-3
1323 * to emulate failed instructions since we usually cannot
1324 * interpret then. This may be a bit risky, in which case
1325 * the reuse detection must be fixed.
1326 */
1327 rc = pgmPoolAccessHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1328 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1329 && fReused)
1330 {
1331 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1332 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1333 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1334 }
1335 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1336 pgmUnlock(pVM);
1337 return rc;
1338}
1339
1340# endif /* !IN_RING3 */
1341
1342# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1343
1344# if defined(VBOX_STRICT) && !defined(IN_RING3)
1345
1346/**
1347 * Check references to guest physical memory in a PAE / PAE page table.
1348 *
1349 * @param pPool The pool.
1350 * @param pPage The page.
1351 * @param pShwPT The shadow page table (mapping of the page).
1352 * @param pGstPT The guest page table.
1353 */
1354static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1355{
1356 unsigned cErrors = 0;
1357 int LastRc = -1; /* initialized to shut up gcc */
1358 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1359 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1360 PVM pVM = pPool->CTX_SUFF(pVM);
1361
1362#ifdef VBOX_STRICT
1363 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1364 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1365#endif
1366 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1367 {
1368 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1369 {
1370 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1371 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1372 if ( rc != VINF_SUCCESS
1373 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1374 {
1375 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1376 LastPTE = i;
1377 LastRc = rc;
1378 LastHCPhys = HCPhys;
1379 cErrors++;
1380
1381 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1382 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1383 AssertRC(rc);
1384
1385 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1386 {
1387 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1388
1389 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1390 {
1391 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1392
1393 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1394 {
1395 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1396 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1397 {
1398 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1399 }
1400 }
1401
1402 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1403 }
1404 }
1405 }
1406 }
1407 }
1408 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1409}
1410
1411
1412/**
1413 * Check references to guest physical memory in a PAE / 32-bit page table.
1414 *
1415 * @param pPool The pool.
1416 * @param pPage The page.
1417 * @param pShwPT The shadow page table (mapping of the page).
1418 * @param pGstPT The guest page table.
1419 */
1420static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1421{
1422 unsigned cErrors = 0;
1423 int LastRc = -1; /* initialized to shut up gcc */
1424 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1425 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1426 PVM pVM = pPool->CTX_SUFF(pVM);
1427
1428#ifdef VBOX_STRICT
1429 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1430 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1431#endif
1432 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1433 {
1434 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1435 {
1436 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1437 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1438 if ( rc != VINF_SUCCESS
1439 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1440 {
1441 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1442 LastPTE = i;
1443 LastRc = rc;
1444 LastHCPhys = HCPhys;
1445 cErrors++;
1446
1447 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1448 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1449 AssertRC(rc);
1450
1451 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1452 {
1453 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1454
1455 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1456 {
1457 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1458
1459 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1460 {
1461 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1462 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1463 {
1464 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1465 }
1466 }
1467
1468 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1469 }
1470 }
1471 }
1472 }
1473 }
1474 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1475}
1476
1477# endif /* VBOX_STRICT && !IN_RING3 */
1478
1479/**
1480 * Clear references to guest physical memory in a PAE / PAE page table.
1481 *
1482 * @returns nr of changed PTEs
1483 * @param pPool The pool.
1484 * @param pPage The page.
1485 * @param pShwPT The shadow page table (mapping of the page).
1486 * @param pGstPT The guest page table.
1487 * @param pOldGstPT The old cached guest page table.
1488 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1489 * @param pfFlush Flush reused page table (out)
1490 */
1491DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1492 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1493{
1494 unsigned cChanged = 0;
1495
1496#ifdef VBOX_STRICT
1497 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1498 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1499#endif
1500 *pfFlush = false;
1501
1502 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1503 {
1504 /* Check the new value written by the guest. If present and with a bogus physical address, then
1505 * it's fairly safe to assume the guest is reusing the PT.
1506 */
1507 if ( fAllowRemoval
1508 && pGstPT->a[i].n.u1Present)
1509 {
1510 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1511 {
1512 *pfFlush = true;
1513 return ++cChanged;
1514 }
1515 }
1516 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1517 {
1518 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1519 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1520 {
1521#ifdef VBOX_STRICT
1522 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1523 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1524 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1525#endif
1526 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1527 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1528 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1529 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1530
1531 if ( uHostAttr == uGuestAttr
1532 && fHostRW <= fGuestRW)
1533 continue;
1534 }
1535 cChanged++;
1536 /* Something was changed, so flush it. */
1537 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1538 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1539 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1540 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1541 }
1542 }
1543 return cChanged;
1544}
1545
1546
1547/**
1548 * Clear references to guest physical memory in a PAE / PAE page table.
1549 *
1550 * @returns nr of changed PTEs
1551 * @param pPool The pool.
1552 * @param pPage The page.
1553 * @param pShwPT The shadow page table (mapping of the page).
1554 * @param pGstPT The guest page table.
1555 * @param pOldGstPT The old cached guest page table.
1556 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1557 * @param pfFlush Flush reused page table (out)
1558 */
1559DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1560 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1561{
1562 unsigned cChanged = 0;
1563
1564#ifdef VBOX_STRICT
1565 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1566 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1567#endif
1568 *pfFlush = false;
1569
1570 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1571 {
1572 /* Check the new value written by the guest. If present and with a bogus physical address, then
1573 * it's fairly safe to assume the guest is reusing the PT.
1574 */
1575 if ( fAllowRemoval
1576 && pGstPT->a[i].n.u1Present)
1577 {
1578 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK))
1579 {
1580 *pfFlush = true;
1581 return ++cChanged;
1582 }
1583 }
1584 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1585 {
1586 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1587 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1588 {
1589#ifdef VBOX_STRICT
1590 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1591 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1592 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1593#endif
1594 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1595 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1596 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1597 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1598
1599 if ( uHostAttr == uGuestAttr
1600 && fHostRW <= fGuestRW)
1601 continue;
1602 }
1603 cChanged++;
1604 /* Something was changed, so flush it. */
1605 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1606 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1607 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1608 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1609 }
1610 }
1611 return cChanged;
1612}
1613
1614
1615/**
1616 * Flush a dirty page
1617 *
1618 * @param pVM The VM handle.
1619 * @param pPool The pool.
1620 * @param idxSlot Dirty array slot index
1621 * @param fAllowRemoval Allow a reused page table to be removed
1622 */
1623static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1624{
1625 PPGMPOOLPAGE pPage;
1626 unsigned idxPage;
1627
1628 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1629 if (pPool->aDirtyPages[idxSlot].uIdx == NIL_PGMPOOL_IDX)
1630 return;
1631
1632 idxPage = pPool->aDirtyPages[idxSlot].uIdx;
1633 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1634 pPage = &pPool->aPages[idxPage];
1635 Assert(pPage->idx == idxPage);
1636 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1637
1638 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1639 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1640
1641#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1642 PVMCPU pVCpu = VMMGetCpu(pVM);
1643 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
1644#endif
1645
1646 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1647 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK);
1648 Assert(rc == VINF_SUCCESS);
1649 pPage->fDirty = false;
1650
1651#ifdef VBOX_STRICT
1652 uint64_t fFlags = 0;
1653 RTHCPHYS HCPhys;
1654 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->GCPtrDirtyFault, &fFlags, &HCPhys);
1655 AssertMsg( ( rc == VINF_SUCCESS
1656 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1657 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1658 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1659 || rc == VERR_PAGE_NOT_PRESENT,
1660 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->GCPtrDirtyFault, rc, fFlags));
1661#endif
1662
1663 /* Flush those PTEs that have changed. */
1664 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1665 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1666 void *pvGst;
1667 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1668 bool fFlush;
1669 unsigned cChanges;
1670
1671 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1672 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1673 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1674 else
1675 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1676 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1677
1678 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1679 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1680 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1681 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1682
1683 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1684 Assert(pPage->cModifications);
1685 if (cChanges < 4)
1686 pPage->cModifications = 1; /* must use > 0 here */
1687 else
1688 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1689
1690 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1691 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1692 pPool->idxFreeDirtyPage = idxSlot;
1693
1694 pPool->cDirtyPages--;
1695 pPool->aDirtyPages[idxSlot].uIdx = NIL_PGMPOOL_IDX;
1696 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1697 if (fFlush)
1698 {
1699 Assert(fAllowRemoval);
1700 Log(("Flush reused page table!\n"));
1701 pgmPoolFlushPage(pPool, pPage);
1702 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1703 }
1704 else
1705 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1706
1707#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1708 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
1709#endif
1710}
1711
1712
1713# ifndef IN_RING3
1714/**
1715 * Add a new dirty page
1716 *
1717 * @param pVM The VM handle.
1718 * @param pPool The pool.
1719 * @param pPage The page.
1720 */
1721void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1722{
1723 unsigned idxFree;
1724
1725 PGM_LOCK_ASSERT_OWNER(pVM);
1726 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1727 Assert(!pPage->fDirty);
1728
1729 idxFree = pPool->idxFreeDirtyPage;
1730 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1731 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1732
1733 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1734 {
1735 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1736 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1737 }
1738 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1739 AssertMsg(pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1740
1741 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1742
1743 /*
1744 * Make a copy of the guest page table as we require valid GCPhys addresses
1745 * when removing references to physical pages.
1746 * (The HCPhys linear lookup is *extremely* expensive!)
1747 */
1748 void *pvGst;
1749 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1750 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst, (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT) ? PAGE_SIZE : PAGE_SIZE/2);
1751# ifdef VBOX_STRICT
1752 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1753 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1754 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1755 else
1756 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1757 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1758# endif
1759 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1760
1761 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1762 pPage->fDirty = true;
1763 pPage->idxDirtyEntry = (uint8_t)idxFree; Assert(pPage->idxDirtyEntry == idxFree);
1764 pPool->aDirtyPages[idxFree].uIdx = pPage->idx;
1765 pPool->cDirtyPages++;
1766
1767 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1768 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1769 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1770 {
1771 unsigned i;
1772 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1773 {
1774 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1775 if (pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX)
1776 {
1777 pPool->idxFreeDirtyPage = idxFree;
1778 break;
1779 }
1780 }
1781 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1782 }
1783
1784 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX);
1785 return;
1786}
1787# endif /* !IN_RING3 */
1788
1789
1790/**
1791 * Check if the specified page is dirty (not write monitored)
1792 *
1793 * @return dirty or not
1794 * @param pVM The VM handle.
1795 * @param GCPhys Guest physical address
1796 */
1797bool pgmPoolIsDirtyPage(PVM pVM, RTGCPHYS GCPhys)
1798{
1799 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1800 PGM_LOCK_ASSERT_OWNER(pVM);
1801 if (!pPool->cDirtyPages)
1802 return false;
1803
1804 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1805
1806 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1807 {
1808 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
1809 {
1810 PPGMPOOLPAGE pPage;
1811 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
1812
1813 pPage = &pPool->aPages[idxPage];
1814 if (pPage->GCPhys == GCPhys)
1815 return true;
1816 }
1817 }
1818 return false;
1819}
1820
1821
1822/**
1823 * Reset all dirty pages by reinstating page monitoring.
1824 *
1825 * @param pVM The VM handle.
1826 */
1827void pgmPoolResetDirtyPages(PVM pVM)
1828{
1829 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1830 PGM_LOCK_ASSERT_OWNER(pVM);
1831 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1832
1833 if (!pPool->cDirtyPages)
1834 return;
1835
1836 Log(("pgmPoolResetDirtyPages\n"));
1837 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1838 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1839
1840 pPool->idxFreeDirtyPage = 0;
1841 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1842 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1843 {
1844 unsigned i;
1845 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1846 {
1847 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
1848 {
1849 pPool->idxFreeDirtyPage = i;
1850 break;
1851 }
1852 }
1853 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1854 }
1855
1856 Assert(pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
1857 return;
1858}
1859
1860
1861/**
1862 * Invalidate the PT entry for the specified page
1863 *
1864 * @param pVM The VM handle.
1865 * @param GCPtrPage Guest page to invalidate
1866 */
1867void pgmPoolResetDirtyPage(PVM pVM, RTGCPTR GCPtrPage)
1868{
1869 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1870 PGM_LOCK_ASSERT_OWNER(pVM);
1871 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1872
1873 if (!pPool->cDirtyPages)
1874 return;
1875
1876 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage));
1877 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1878 {
1879 }
1880}
1881
1882
1883/**
1884 * Reset all dirty pages by reinstating page monitoring.
1885 *
1886 * @param pVM The VM handle.
1887 * @param GCPhysPT Physical address of the page table
1888 */
1889void pgmPoolInvalidateDirtyPage(PVM pVM, RTGCPHYS GCPhysPT)
1890{
1891 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1892 PGM_LOCK_ASSERT_OWNER(pVM);
1893 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1894 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
1895
1896 if (!pPool->cDirtyPages)
1897 return;
1898
1899 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1900
1901 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1902 {
1903 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
1904 {
1905 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
1906
1907 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1908 if (pPage->GCPhys == GCPhysPT)
1909 {
1910 idxDirtyPage = i;
1911 break;
1912 }
1913 }
1914 }
1915
1916 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
1917 {
1918 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
1919 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1920 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1921 {
1922 unsigned i;
1923 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1924 {
1925 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
1926 {
1927 pPool->idxFreeDirtyPage = i;
1928 break;
1929 }
1930 }
1931 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1932 }
1933 }
1934}
1935
1936# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1937
1938/**
1939 * Inserts a page into the GCPhys hash table.
1940 *
1941 * @param pPool The pool.
1942 * @param pPage The page.
1943 */
1944DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1945{
1946 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1947 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1948 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1949 pPage->iNext = pPool->aiHash[iHash];
1950 pPool->aiHash[iHash] = pPage->idx;
1951}
1952
1953
1954/**
1955 * Removes a page from the GCPhys hash table.
1956 *
1957 * @param pPool The pool.
1958 * @param pPage The page.
1959 */
1960DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1961{
1962 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1963 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1964 if (pPool->aiHash[iHash] == pPage->idx)
1965 pPool->aiHash[iHash] = pPage->iNext;
1966 else
1967 {
1968 uint16_t iPrev = pPool->aiHash[iHash];
1969 for (;;)
1970 {
1971 const int16_t i = pPool->aPages[iPrev].iNext;
1972 if (i == pPage->idx)
1973 {
1974 pPool->aPages[iPrev].iNext = pPage->iNext;
1975 break;
1976 }
1977 if (i == NIL_PGMPOOL_IDX)
1978 {
1979 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
1980 break;
1981 }
1982 iPrev = i;
1983 }
1984 }
1985 pPage->iNext = NIL_PGMPOOL_IDX;
1986}
1987
1988
1989/**
1990 * Frees up one cache page.
1991 *
1992 * @returns VBox status code.
1993 * @retval VINF_SUCCESS on success.
1994 * @param pPool The pool.
1995 * @param iUser The user index.
1996 */
1997static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1998{
1999#ifndef IN_RC
2000 const PVM pVM = pPool->CTX_SUFF(pVM);
2001#endif
2002 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
2003 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
2004
2005 /*
2006 * Select one page from the tail of the age list.
2007 */
2008 PPGMPOOLPAGE pPage;
2009 for (unsigned iLoop = 0; ; iLoop++)
2010 {
2011 uint16_t iToFree = pPool->iAgeTail;
2012 if (iToFree == iUser)
2013 iToFree = pPool->aPages[iToFree].iAgePrev;
2014/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2015 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2016 {
2017 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2018 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2019 {
2020 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2021 continue;
2022 iToFree = i;
2023 break;
2024 }
2025 }
2026*/
2027 Assert(iToFree != iUser);
2028 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
2029 pPage = &pPool->aPages[iToFree];
2030
2031 /*
2032 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2033 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2034 */
2035 if (!pgmPoolIsPageLocked(pPage))
2036 break;
2037 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2038 pgmPoolCacheUsed(pPool, pPage);
2039 AssertLogRelReturn(iLoop < 8192, VERR_PGM_POOL_TOO_MANY_LOOPS);
2040 }
2041
2042 /*
2043 * Found a usable page, flush it and return.
2044 */
2045 int rc = pgmPoolFlushPage(pPool, pPage);
2046 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2047 /* todo: find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2048 if (rc == VINF_SUCCESS)
2049 PGM_INVL_ALL_VCPU_TLBS(pVM);
2050 return rc;
2051}
2052
2053
2054/**
2055 * Checks if a kind mismatch is really a page being reused
2056 * or if it's just normal remappings.
2057 *
2058 * @returns true if reused and the cached page (enmKind1) should be flushed
2059 * @returns false if not reused.
2060 * @param enmKind1 The kind of the cached page.
2061 * @param enmKind2 The kind of the requested page.
2062 */
2063static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2064{
2065 switch (enmKind1)
2066 {
2067 /*
2068 * Never reuse them. There is no remapping in non-paging mode.
2069 */
2070 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2071 case PGMPOOLKIND_32BIT_PD_PHYS:
2072 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2073 case PGMPOOLKIND_PAE_PD_PHYS:
2074 case PGMPOOLKIND_PAE_PDPT_PHYS:
2075 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2076 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2077 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2078 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2079 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2080 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2081 return false;
2082
2083 /*
2084 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2085 */
2086 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2087 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2088 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2089 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2090 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2091 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2092 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2093 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2094 case PGMPOOLKIND_32BIT_PD:
2095 case PGMPOOLKIND_PAE_PDPT:
2096 switch (enmKind2)
2097 {
2098 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2099 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2100 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2101 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2102 case PGMPOOLKIND_64BIT_PML4:
2103 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2104 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2105 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2106 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2107 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2108 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2109 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2110 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2111 return true;
2112 default:
2113 return false;
2114 }
2115
2116 /*
2117 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2118 */
2119 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2120 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2121 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2122 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2123 case PGMPOOLKIND_64BIT_PML4:
2124 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2125 switch (enmKind2)
2126 {
2127 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2128 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2129 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2130 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2131 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2132 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2133 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2134 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2135 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2136 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2137 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2138 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2139 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2140 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2141 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2142 return true;
2143 default:
2144 return false;
2145 }
2146
2147 /*
2148 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2149 */
2150 case PGMPOOLKIND_ROOT_NESTED:
2151 return false;
2152
2153 default:
2154 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2155 }
2156}
2157
2158
2159/**
2160 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2161 *
2162 * @returns VBox status code.
2163 * @retval VINF_PGM_CACHED_PAGE on success.
2164 * @retval VERR_FILE_NOT_FOUND if not found.
2165 * @param pPool The pool.
2166 * @param GCPhys The GC physical address of the page we're gonna shadow.
2167 * @param enmKind The kind of mapping.
2168 * @param enmAccess Access type for the mapping (only relevant for big pages)
2169 * @param fA20Enabled Whether the CPU has the A20 gate enabled.
2170 * @param iUser The shadow page pool index of the user table.
2171 * @param iUserTable The index into the user table (shadowed).
2172 * @param ppPage Where to store the pointer to the page.
2173 */
2174static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
2175 uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2176{
2177 /*
2178 * Look up the GCPhys in the hash.
2179 */
2180 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2181 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2182 if (i != NIL_PGMPOOL_IDX)
2183 {
2184 do
2185 {
2186 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2187 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2188 if (pPage->GCPhys == GCPhys)
2189 {
2190 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2191 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess
2192 && pPage->fA20Enabled == fA20Enabled)
2193 {
2194 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2195 * doesn't flush it in case there are no more free use records.
2196 */
2197 pgmPoolCacheUsed(pPool, pPage);
2198
2199 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2200 if (RT_SUCCESS(rc))
2201 {
2202 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2203 *ppPage = pPage;
2204 if (pPage->cModifications)
2205 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2206 STAM_COUNTER_INC(&pPool->StatCacheHits);
2207 return VINF_PGM_CACHED_PAGE;
2208 }
2209 return rc;
2210 }
2211
2212 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2213 {
2214 /*
2215 * The kind is different. In some cases we should now flush the page
2216 * as it has been reused, but in most cases this is normal remapping
2217 * of PDs as PT or big pages using the GCPhys field in a slightly
2218 * different way than the other kinds.
2219 */
2220 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2221 {
2222 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2223 pgmPoolFlushPage(pPool, pPage);
2224 break;
2225 }
2226 }
2227 }
2228
2229 /* next */
2230 i = pPage->iNext;
2231 } while (i != NIL_PGMPOOL_IDX);
2232 }
2233
2234 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2235 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2236 return VERR_FILE_NOT_FOUND;
2237}
2238
2239
2240/**
2241 * Inserts a page into the cache.
2242 *
2243 * @param pPool The pool.
2244 * @param pPage The cached page.
2245 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2246 */
2247static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2248{
2249 /*
2250 * Insert into the GCPhys hash if the page is fit for that.
2251 */
2252 Assert(!pPage->fCached);
2253 if (fCanBeCached)
2254 {
2255 pPage->fCached = true;
2256 pgmPoolHashInsert(pPool, pPage);
2257 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2258 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2259 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2260 }
2261 else
2262 {
2263 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2264 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2265 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2266 }
2267
2268 /*
2269 * Insert at the head of the age list.
2270 */
2271 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2272 pPage->iAgeNext = pPool->iAgeHead;
2273 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2274 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2275 else
2276 pPool->iAgeTail = pPage->idx;
2277 pPool->iAgeHead = pPage->idx;
2278}
2279
2280
2281/**
2282 * Flushes a cached page.
2283 *
2284 * @param pPool The pool.
2285 * @param pPage The cached page.
2286 */
2287static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2288{
2289 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2290
2291 /*
2292 * Remove the page from the hash.
2293 */
2294 if (pPage->fCached)
2295 {
2296 pPage->fCached = false;
2297 pgmPoolHashRemove(pPool, pPage);
2298 }
2299 else
2300 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2301
2302 /*
2303 * Remove it from the age list.
2304 */
2305 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2306 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2307 else
2308 pPool->iAgeTail = pPage->iAgePrev;
2309 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2310 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2311 else
2312 pPool->iAgeHead = pPage->iAgeNext;
2313 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2314 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2315}
2316
2317
2318/**
2319 * Looks for pages sharing the monitor.
2320 *
2321 * @returns Pointer to the head page.
2322 * @returns NULL if not found.
2323 * @param pPool The Pool
2324 * @param pNewPage The page which is going to be monitored.
2325 */
2326static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2327{
2328 /*
2329 * Look up the GCPhys in the hash.
2330 */
2331 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2332 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2333 if (i == NIL_PGMPOOL_IDX)
2334 return NULL;
2335 do
2336 {
2337 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2338 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2339 && pPage != pNewPage)
2340 {
2341 switch (pPage->enmKind)
2342 {
2343 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2344 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2345 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2346 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2347 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2348 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2349 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2350 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2351 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2352 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2353 case PGMPOOLKIND_64BIT_PML4:
2354 case PGMPOOLKIND_32BIT_PD:
2355 case PGMPOOLKIND_PAE_PDPT:
2356 {
2357 /* find the head */
2358 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2359 {
2360 Assert(pPage->iMonitoredPrev != pPage->idx);
2361 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2362 }
2363 return pPage;
2364 }
2365
2366 /* ignore, no monitoring. */
2367 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2368 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2369 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2370 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2371 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2372 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2373 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2374 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2375 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2376 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2377 case PGMPOOLKIND_ROOT_NESTED:
2378 case PGMPOOLKIND_PAE_PD_PHYS:
2379 case PGMPOOLKIND_PAE_PDPT_PHYS:
2380 case PGMPOOLKIND_32BIT_PD_PHYS:
2381 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2382 break;
2383 default:
2384 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2385 }
2386 }
2387
2388 /* next */
2389 i = pPage->iNext;
2390 } while (i != NIL_PGMPOOL_IDX);
2391 return NULL;
2392}
2393
2394
2395/**
2396 * Enabled write monitoring of a guest page.
2397 *
2398 * @returns VBox status code.
2399 * @retval VINF_SUCCESS on success.
2400 * @param pPool The pool.
2401 * @param pPage The cached page.
2402 */
2403static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2404{
2405 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2406
2407 /*
2408 * Filter out the relevant kinds.
2409 */
2410 switch (pPage->enmKind)
2411 {
2412 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2413 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2414 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2415 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2416 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2417 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2418 case PGMPOOLKIND_64BIT_PML4:
2419 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2420 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2421 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2422 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2423 case PGMPOOLKIND_32BIT_PD:
2424 case PGMPOOLKIND_PAE_PDPT:
2425 break;
2426
2427 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2428 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2429 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2430 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2431 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2432 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2433 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2434 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2435 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2436 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2437 case PGMPOOLKIND_ROOT_NESTED:
2438 /* Nothing to monitor here. */
2439 return VINF_SUCCESS;
2440
2441 case PGMPOOLKIND_32BIT_PD_PHYS:
2442 case PGMPOOLKIND_PAE_PDPT_PHYS:
2443 case PGMPOOLKIND_PAE_PD_PHYS:
2444 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2445 /* Nothing to monitor here. */
2446 return VINF_SUCCESS;
2447 default:
2448 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2449 }
2450
2451 /*
2452 * Install handler.
2453 */
2454 int rc;
2455 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2456 if (pPageHead)
2457 {
2458 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2459 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2460
2461#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2462 if (pPageHead->fDirty)
2463 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirtyEntry, false /* do not remove */);
2464#endif
2465
2466 pPage->iMonitoredPrev = pPageHead->idx;
2467 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2468 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2469 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2470 pPageHead->iMonitoredNext = pPage->idx;
2471 rc = VINF_SUCCESS;
2472 }
2473 else
2474 {
2475 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2476 PVM pVM = pPool->CTX_SUFF(pVM);
2477 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2478 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
2479 GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK,
2480 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
2481 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
2482 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
2483 pPool->pszAccessHandler);
2484 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2485 * the heap size should suffice. */
2486 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2487 PVMCPU pVCpu = VMMGetCpu(pVM);
2488 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2489 }
2490 pPage->fMonitored = true;
2491 return rc;
2492}
2493
2494
2495/**
2496 * Disables write monitoring of a guest page.
2497 *
2498 * @returns VBox status code.
2499 * @retval VINF_SUCCESS on success.
2500 * @param pPool The pool.
2501 * @param pPage The cached page.
2502 */
2503static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2504{
2505 /*
2506 * Filter out the relevant kinds.
2507 */
2508 switch (pPage->enmKind)
2509 {
2510 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2511 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2512 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2513 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2514 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2515 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2516 case PGMPOOLKIND_64BIT_PML4:
2517 case PGMPOOLKIND_32BIT_PD:
2518 case PGMPOOLKIND_PAE_PDPT:
2519 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2520 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2521 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2522 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2523 break;
2524
2525 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2526 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2527 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2528 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2529 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2530 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2531 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2532 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2533 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2534 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2535 case PGMPOOLKIND_ROOT_NESTED:
2536 case PGMPOOLKIND_PAE_PD_PHYS:
2537 case PGMPOOLKIND_PAE_PDPT_PHYS:
2538 case PGMPOOLKIND_32BIT_PD_PHYS:
2539 /* Nothing to monitor here. */
2540 Assert(!pPage->fMonitored);
2541 return VINF_SUCCESS;
2542
2543 default:
2544 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2545 }
2546 Assert(pPage->fMonitored);
2547
2548 /*
2549 * Remove the page from the monitored list or uninstall it if last.
2550 */
2551 const PVM pVM = pPool->CTX_SUFF(pVM);
2552 int rc;
2553 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2554 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2555 {
2556 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2557 {
2558 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2559 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2560 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK,
2561 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
2562 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
2563 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
2564 pPool->pszAccessHandler);
2565 AssertFatalRCSuccess(rc);
2566 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2567 }
2568 else
2569 {
2570 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2571 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2572 {
2573 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2574 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2575 }
2576 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2577 rc = VINF_SUCCESS;
2578 }
2579 }
2580 else
2581 {
2582 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2583 AssertFatalRC(rc);
2584 PVMCPU pVCpu = VMMGetCpu(pVM);
2585 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2586 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2587 }
2588 pPage->fMonitored = false;
2589
2590 /*
2591 * Remove it from the list of modified pages (if in it).
2592 */
2593 pgmPoolMonitorModifiedRemove(pPool, pPage);
2594
2595 return rc;
2596}
2597
2598
2599/**
2600 * Inserts the page into the list of modified pages.
2601 *
2602 * @param pPool The pool.
2603 * @param pPage The page.
2604 */
2605void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2606{
2607 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2608 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2609 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2610 && pPool->iModifiedHead != pPage->idx,
2611 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2612 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2613 pPool->iModifiedHead, pPool->cModifiedPages));
2614
2615 pPage->iModifiedNext = pPool->iModifiedHead;
2616 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2617 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2618 pPool->iModifiedHead = pPage->idx;
2619 pPool->cModifiedPages++;
2620#ifdef VBOX_WITH_STATISTICS
2621 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2622 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2623#endif
2624}
2625
2626
2627/**
2628 * Removes the page from the list of modified pages and resets the
2629 * modification counter.
2630 *
2631 * @param pPool The pool.
2632 * @param pPage The page which is believed to be in the list of modified pages.
2633 */
2634static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2635{
2636 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2637 if (pPool->iModifiedHead == pPage->idx)
2638 {
2639 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2640 pPool->iModifiedHead = pPage->iModifiedNext;
2641 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2642 {
2643 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2644 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2645 }
2646 pPool->cModifiedPages--;
2647 }
2648 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2649 {
2650 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2651 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2652 {
2653 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2654 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2655 }
2656 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2657 pPool->cModifiedPages--;
2658 }
2659 else
2660 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2661 pPage->cModifications = 0;
2662}
2663
2664
2665/**
2666 * Zaps the list of modified pages, resetting their modification counters in the process.
2667 *
2668 * @param pVM The VM handle.
2669 */
2670static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2671{
2672 pgmLock(pVM);
2673 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2674 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2675
2676 unsigned cPages = 0; NOREF(cPages);
2677
2678#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2679 pgmPoolResetDirtyPages(pVM);
2680#endif
2681
2682 uint16_t idx = pPool->iModifiedHead;
2683 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2684 while (idx != NIL_PGMPOOL_IDX)
2685 {
2686 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2687 idx = pPage->iModifiedNext;
2688 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2689 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2690 pPage->cModifications = 0;
2691 Assert(++cPages);
2692 }
2693 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2694 pPool->cModifiedPages = 0;
2695 pgmUnlock(pVM);
2696}
2697
2698
2699/**
2700 * Handle SyncCR3 pool tasks
2701 *
2702 * @returns VBox status code.
2703 * @retval VINF_SUCCESS if successfully added.
2704 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2705 * @param pVCpu The VMCPU handle.
2706 * @remark Should only be used when monitoring is available, thus placed in
2707 * the PGMPOOL_WITH_MONITORING #ifdef.
2708 */
2709int pgmPoolSyncCR3(PVMCPU pVCpu)
2710{
2711 PVM pVM = pVCpu->CTX_SUFF(pVM);
2712 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2713
2714 /*
2715 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2716 * Occasionally we will have to clear all the shadow page tables because we wanted
2717 * to monitor a page which was mapped by too many shadowed page tables. This operation
2718 * sometimes referred to as a 'lightweight flush'.
2719 */
2720# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2721 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2722 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2723# else /* !IN_RING3 */
2724 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2725 {
2726 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2727 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2728
2729 /* Make sure all other VCPUs return to ring 3. */
2730 if (pVM->cCpus > 1)
2731 {
2732 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2733 PGM_INVL_ALL_VCPU_TLBS(pVM);
2734 }
2735 return VINF_PGM_SYNC_CR3;
2736 }
2737# endif /* !IN_RING3 */
2738 else
2739 {
2740 pgmPoolMonitorModifiedClearAll(pVM);
2741
2742 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2743 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2744 {
2745 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2746 return pgmPoolSyncCR3(pVCpu);
2747 }
2748 }
2749 return VINF_SUCCESS;
2750}
2751
2752
2753/**
2754 * Frees up at least one user entry.
2755 *
2756 * @returns VBox status code.
2757 * @retval VINF_SUCCESS if successfully added.
2758 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2759 * @param pPool The pool.
2760 * @param iUser The user index.
2761 */
2762static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2763{
2764 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2765 /*
2766 * Just free cached pages in a braindead fashion.
2767 */
2768 /** @todo walk the age list backwards and free the first with usage. */
2769 int rc = VINF_SUCCESS;
2770 do
2771 {
2772 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2773 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2774 rc = rc2;
2775 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2776 return rc;
2777}
2778
2779
2780/**
2781 * Inserts a page into the cache.
2782 *
2783 * This will create user node for the page, insert it into the GCPhys
2784 * hash, and insert it into the age list.
2785 *
2786 * @returns VBox status code.
2787 * @retval VINF_SUCCESS if successfully added.
2788 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2789 * @param pPool The pool.
2790 * @param pPage The cached page.
2791 * @param GCPhys The GC physical address of the page we're gonna shadow.
2792 * @param iUser The user index.
2793 * @param iUserTable The user table index.
2794 */
2795DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2796{
2797 int rc = VINF_SUCCESS;
2798 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2799
2800 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable));
2801
2802#ifdef VBOX_STRICT
2803 /*
2804 * Check that the entry doesn't already exists.
2805 */
2806 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2807 {
2808 uint16_t i = pPage->iUserHead;
2809 do
2810 {
2811 Assert(i < pPool->cMaxUsers);
2812 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2813 i = paUsers[i].iNext;
2814 } while (i != NIL_PGMPOOL_USER_INDEX);
2815 }
2816#endif
2817
2818 /*
2819 * Find free a user node.
2820 */
2821 uint16_t i = pPool->iUserFreeHead;
2822 if (i == NIL_PGMPOOL_USER_INDEX)
2823 {
2824 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2825 if (RT_FAILURE(rc))
2826 return rc;
2827 i = pPool->iUserFreeHead;
2828 }
2829
2830 /*
2831 * Unlink the user node from the free list,
2832 * initialize and insert it into the user list.
2833 */
2834 pPool->iUserFreeHead = paUsers[i].iNext;
2835 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2836 paUsers[i].iUser = iUser;
2837 paUsers[i].iUserTable = iUserTable;
2838 pPage->iUserHead = i;
2839
2840 /*
2841 * Insert into cache and enable monitoring of the guest page if enabled.
2842 *
2843 * Until we implement caching of all levels, including the CR3 one, we'll
2844 * have to make sure we don't try monitor & cache any recursive reuse of
2845 * a monitored CR3 page. Because all windows versions are doing this we'll
2846 * have to be able to do combined access monitoring, CR3 + PT and
2847 * PD + PT (guest PAE).
2848 *
2849 * Update:
2850 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2851 */
2852 const bool fCanBeMonitored = true;
2853 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2854 if (fCanBeMonitored)
2855 {
2856 rc = pgmPoolMonitorInsert(pPool, pPage);
2857 AssertRC(rc);
2858 }
2859 return rc;
2860}
2861
2862
2863/**
2864 * Adds a user reference to a page.
2865 *
2866 * This will move the page to the head of the
2867 *
2868 * @returns VBox status code.
2869 * @retval VINF_SUCCESS if successfully added.
2870 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2871 * @param pPool The pool.
2872 * @param pPage The cached page.
2873 * @param iUser The user index.
2874 * @param iUserTable The user table.
2875 */
2876static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2877{
2878 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2879
2880 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2881
2882# ifdef VBOX_STRICT
2883 /*
2884 * Check that the entry doesn't already exists. We only allow multiple
2885 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
2886 */
2887 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2888 {
2889 uint16_t i = pPage->iUserHead;
2890 do
2891 {
2892 Assert(i < pPool->cMaxUsers);
2893 AssertMsg(iUser != PGMPOOL_IDX_PD || iUser != PGMPOOL_IDX_PDPT || iUser != PGMPOOL_IDX_NESTED_ROOT || iUser != PGMPOOL_IDX_AMD64_CR3 ||
2894 paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2895 i = paUsers[i].iNext;
2896 } while (i != NIL_PGMPOOL_USER_INDEX);
2897 }
2898# endif
2899
2900 /*
2901 * Allocate a user node.
2902 */
2903 uint16_t i = pPool->iUserFreeHead;
2904 if (i == NIL_PGMPOOL_USER_INDEX)
2905 {
2906 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2907 if (RT_FAILURE(rc))
2908 return rc;
2909 i = pPool->iUserFreeHead;
2910 }
2911 pPool->iUserFreeHead = paUsers[i].iNext;
2912
2913 /*
2914 * Initialize the user node and insert it.
2915 */
2916 paUsers[i].iNext = pPage->iUserHead;
2917 paUsers[i].iUser = iUser;
2918 paUsers[i].iUserTable = iUserTable;
2919 pPage->iUserHead = i;
2920
2921# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2922 if (pPage->fDirty)
2923 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirtyEntry, false /* do not remove */);
2924# endif
2925
2926 /*
2927 * Tell the cache to update its replacement stats for this page.
2928 */
2929 pgmPoolCacheUsed(pPool, pPage);
2930 return VINF_SUCCESS;
2931}
2932
2933
2934/**
2935 * Frees a user record associated with a page.
2936 *
2937 * This does not clear the entry in the user table, it simply replaces the
2938 * user record to the chain of free records.
2939 *
2940 * @param pPool The pool.
2941 * @param HCPhys The HC physical address of the shadow page.
2942 * @param iUser The shadow page pool index of the user table.
2943 * @param iUserTable The index into the user table (shadowed).
2944 */
2945static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2946{
2947 /*
2948 * Unlink and free the specified user entry.
2949 */
2950 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2951
2952 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2953 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2954 uint16_t i = pPage->iUserHead;
2955 if ( i != NIL_PGMPOOL_USER_INDEX
2956 && paUsers[i].iUser == iUser
2957 && paUsers[i].iUserTable == iUserTable)
2958 {
2959 pPage->iUserHead = paUsers[i].iNext;
2960
2961 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2962 paUsers[i].iNext = pPool->iUserFreeHead;
2963 pPool->iUserFreeHead = i;
2964 return;
2965 }
2966
2967 /* General: Linear search. */
2968 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2969 while (i != NIL_PGMPOOL_USER_INDEX)
2970 {
2971 if ( paUsers[i].iUser == iUser
2972 && paUsers[i].iUserTable == iUserTable)
2973 {
2974 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2975 paUsers[iPrev].iNext = paUsers[i].iNext;
2976 else
2977 pPage->iUserHead = paUsers[i].iNext;
2978
2979 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2980 paUsers[i].iNext = pPool->iUserFreeHead;
2981 pPool->iUserFreeHead = i;
2982 return;
2983 }
2984 iPrev = i;
2985 i = paUsers[i].iNext;
2986 }
2987
2988 /* Fatal: didn't find it */
2989 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
2990 iUser, iUserTable, pPage->GCPhys));
2991}
2992
2993
2994/**
2995 * Gets the entry size of a shadow table.
2996 *
2997 * @param enmKind The kind of page.
2998 *
2999 * @returns The size of the entry in bytes. That is, 4 or 8.
3000 * @returns If the kind is not for a table, an assertion is raised and 0 is
3001 * returned.
3002 */
3003DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
3004{
3005 switch (enmKind)
3006 {
3007 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3008 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3009 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3010 case PGMPOOLKIND_32BIT_PD:
3011 case PGMPOOLKIND_32BIT_PD_PHYS:
3012 return 4;
3013
3014 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3015 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3016 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3017 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3018 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3019 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3020 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3021 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3022 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3023 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3024 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3025 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3026 case PGMPOOLKIND_64BIT_PML4:
3027 case PGMPOOLKIND_PAE_PDPT:
3028 case PGMPOOLKIND_ROOT_NESTED:
3029 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3030 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3031 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3032 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3033 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3034 case PGMPOOLKIND_PAE_PD_PHYS:
3035 case PGMPOOLKIND_PAE_PDPT_PHYS:
3036 return 8;
3037
3038 default:
3039 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3040 }
3041}
3042
3043
3044/**
3045 * Gets the entry size of a guest table.
3046 *
3047 * @param enmKind The kind of page.
3048 *
3049 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3050 * @returns If the kind is not for a table, an assertion is raised and 0 is
3051 * returned.
3052 */
3053DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3054{
3055 switch (enmKind)
3056 {
3057 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3058 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3059 case PGMPOOLKIND_32BIT_PD:
3060 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3061 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3062 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3063 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3064 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3065 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3066 return 4;
3067
3068 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3069 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3070 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3071 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3072 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3073 case PGMPOOLKIND_64BIT_PML4:
3074 case PGMPOOLKIND_PAE_PDPT:
3075 return 8;
3076
3077 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3078 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3079 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3080 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3081 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3082 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3083 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3084 case PGMPOOLKIND_ROOT_NESTED:
3085 case PGMPOOLKIND_PAE_PD_PHYS:
3086 case PGMPOOLKIND_PAE_PDPT_PHYS:
3087 case PGMPOOLKIND_32BIT_PD_PHYS:
3088 /** @todo can we return 0? (nobody is calling this...) */
3089 AssertFailed();
3090 return 0;
3091
3092 default:
3093 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3094 }
3095}
3096
3097
3098/**
3099 * Checks one shadow page table entry for a mapping of a physical page.
3100 *
3101 * @returns true / false indicating removal of all relevant PTEs
3102 *
3103 * @param pVM The VM handle.
3104 * @param pPhysPage The guest page in question.
3105 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3106 * @param iShw The shadow page table.
3107 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3108 */
3109static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3110{
3111 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3112 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3113 bool fRet = false;
3114
3115 /*
3116 * Assert sanity.
3117 */
3118 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3119 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3120 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3121
3122 /*
3123 * Then, clear the actual mappings to the page in the shadow PT.
3124 */
3125 switch (pPage->enmKind)
3126 {
3127 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3128 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3129 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3130 {
3131 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3132 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3133 uint32_t u32AndMask = 0;
3134 uint32_t u32OrMask = 0;
3135
3136 if (!fFlushPTEs)
3137 {
3138 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3139 {
3140 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
3141 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
3142 u32OrMask = X86_PTE_RW;
3143 u32AndMask = UINT32_MAX;
3144 fRet = true;
3145 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3146 break;
3147
3148 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3149 u32OrMask = 0;
3150 u32AndMask = ~X86_PTE_RW;
3151 fRet = true;
3152 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3153 break;
3154 default:
3155 /* (shouldn't be here, will assert below) */
3156 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3157 break;
3158 }
3159 }
3160 else
3161 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3162
3163 /* Update the counter if we're removing references. */
3164 if (!u32AndMask)
3165 {
3166 Assert(pPage->cPresent );
3167 Assert(pPool->cPresent);
3168 pPage->cPresent--;
3169 pPool->cPresent--;
3170 }
3171
3172 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3173 {
3174 X86PTE Pte;
3175
3176 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3177 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3178 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3179 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3180
3181 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3182 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3183 return fRet;
3184 }
3185#ifdef LOG_ENABLED
3186 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3187 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3188 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3189 {
3190 Log(("i=%d cFound=%d\n", i, ++cFound));
3191 }
3192#endif
3193 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3194 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3195 break;
3196 }
3197
3198 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3199 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3200 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3201 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3202 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3203 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3204 {
3205 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3206 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3207 uint64_t u64OrMask = 0;
3208 uint64_t u64AndMask = 0;
3209
3210 if (!fFlushPTEs)
3211 {
3212 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3213 {
3214 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3215 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3216 u64OrMask = X86_PTE_RW;
3217 u64AndMask = UINT64_MAX;
3218 fRet = true;
3219 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3220 break;
3221
3222 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3223 u64OrMask = 0;
3224 u64AndMask = ~(uint64_t)X86_PTE_RW;
3225 fRet = true;
3226 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3227 break;
3228
3229 default:
3230 /* (shouldn't be here, will assert below) */
3231 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3232 break;
3233 }
3234 }
3235 else
3236 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3237
3238 /* Update the counter if we're removing references. */
3239 if (!u64AndMask)
3240 {
3241 Assert(pPage->cPresent);
3242 Assert(pPool->cPresent);
3243 pPage->cPresent--;
3244 pPool->cPresent--;
3245 }
3246
3247 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3248 {
3249 X86PTEPAE Pte;
3250
3251 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3252 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3253 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3254 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3255
3256 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3257 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3258 return fRet;
3259 }
3260#ifdef LOG_ENABLED
3261 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3262 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3263 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3264 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3265 Log(("i=%d cFound=%d\n", i, ++cFound));
3266#endif
3267 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3268 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3269 break;
3270 }
3271
3272#ifdef PGM_WITH_LARGE_PAGES
3273 /* Large page case only. */
3274 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3275 {
3276 Assert(pVM->pgm.s.fNestedPaging);
3277
3278 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3279 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3280
3281 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3282 {
3283 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3284 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3285 pPD->a[iPte].u = 0;
3286 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3287
3288 /* Update the counter as we're removing references. */
3289 Assert(pPage->cPresent);
3290 Assert(pPool->cPresent);
3291 pPage->cPresent--;
3292 pPool->cPresent--;
3293
3294 return fRet;
3295 }
3296# ifdef LOG_ENABLED
3297 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3298 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3299 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3300 Log(("i=%d cFound=%d\n", i, ++cFound));
3301# endif
3302 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3303 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3304 break;
3305 }
3306
3307 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3308 case PGMPOOLKIND_PAE_PD_PHYS:
3309 {
3310 Assert(pVM->pgm.s.fNestedPaging);
3311
3312 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3313 PX86PD pPD = (PX86PD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3314
3315 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3316 {
3317 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3318 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3319 pPD->a[iPte].u = 0;
3320 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3321
3322 /* Update the counter as we're removing references. */
3323 Assert(pPage->cPresent);
3324 Assert(pPool->cPresent);
3325 pPage->cPresent--;
3326 pPool->cPresent--;
3327 return fRet;
3328 }
3329# ifdef LOG_ENABLED
3330 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3331 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3332 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3333 Log(("i=%d cFound=%d\n", i, ++cFound));
3334# endif
3335 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3336 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3337 break;
3338 }
3339#endif /* PGM_WITH_LARGE_PAGES */
3340
3341 default:
3342 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3343 }
3344
3345 /* not reached. */
3346#ifndef _MSC_VER
3347 return fRet;
3348#endif
3349}
3350
3351
3352/**
3353 * Scans one shadow page table for mappings of a physical page.
3354 *
3355 * @param pVM The VM handle.
3356 * @param pPhysPage The guest page in question.
3357 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3358 * @param iShw The shadow page table.
3359 */
3360static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3361{
3362 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3363
3364 /* We should only come here with when there's only one reference to this physical page. */
3365 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3366
3367 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3368 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3369 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3370 if (!fKeptPTEs)
3371 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3372 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3373}
3374
3375
3376/**
3377 * Flushes a list of shadow page tables mapping the same physical page.
3378 *
3379 * @param pVM The VM handle.
3380 * @param pPhysPage The guest page in question.
3381 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3382 * @param iPhysExt The physical cross reference extent list to flush.
3383 */
3384static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3385{
3386 PGM_LOCK_ASSERT_OWNER(pVM);
3387 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3388 bool fKeepList = false;
3389
3390 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3391 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3392
3393 const uint16_t iPhysExtStart = iPhysExt;
3394 PPGMPOOLPHYSEXT pPhysExt;
3395 do
3396 {
3397 Assert(iPhysExt < pPool->cMaxPhysExts);
3398 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3399 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3400 {
3401 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3402 {
3403 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3404 if (!fKeptPTEs)
3405 {
3406 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3407 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3408 }
3409 else
3410 fKeepList = true;
3411 }
3412 }
3413 /* next */
3414 iPhysExt = pPhysExt->iNext;
3415 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3416
3417 if (!fKeepList)
3418 {
3419 /* insert the list into the free list and clear the ram range entry. */
3420 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3421 pPool->iPhysExtFreeHead = iPhysExtStart;
3422 /* Invalidate the tracking data. */
3423 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3424 }
3425
3426 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3427}
3428
3429
3430/**
3431 * Flushes all shadow page table mappings of the given guest page.
3432 *
3433 * This is typically called when the host page backing the guest one has been
3434 * replaced or when the page protection was changed due to a guest access
3435 * caught by the monitoring.
3436 *
3437 * @returns VBox status code.
3438 * @retval VINF_SUCCESS if all references has been successfully cleared.
3439 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3440 * pool cleaning. FF and sync flags are set.
3441 *
3442 * @param pVM The VM handle.
3443 * @param GCPhysPage GC physical address of the page in question
3444 * @param pPhysPage The guest page in question.
3445 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3446 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3447 * flushed, it is NOT touched if this isn't necessary.
3448 * The caller MUST initialized this to @a false.
3449 */
3450int pgmPoolTrackUpdateGCPhys(PVM pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3451{
3452 PVMCPU pVCpu = VMMGetCpu(pVM);
3453 pgmLock(pVM);
3454 int rc = VINF_SUCCESS;
3455
3456#ifdef PGM_WITH_LARGE_PAGES
3457 /* Is this page part of a large page? */
3458 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3459 {
3460 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3461 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3462
3463 /* Fetch the large page base. */
3464 PPGMPAGE pLargePage;
3465 if (GCPhysBase != GCPhysPage)
3466 {
3467 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3468 AssertFatal(pLargePage);
3469 }
3470 else
3471 pLargePage = pPhysPage;
3472
3473 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3474
3475 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3476 {
3477 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3478 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3479 pVM->pgm.s.cLargePagesDisabled++;
3480
3481 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3482 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3483
3484 *pfFlushTLBs = true;
3485 pgmUnlock(pVM);
3486 return rc;
3487 }
3488 }
3489#else
3490 NOREF(GCPhysPage);
3491#endif /* PGM_WITH_LARGE_PAGES */
3492
3493 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3494 if (u16)
3495 {
3496 /*
3497 * The zero page is currently screwing up the tracking and we'll
3498 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3499 * is defined, zero pages won't normally be mapped. Some kind of solution
3500 * will be needed for this problem of course, but it will have to wait...
3501 */
3502 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3503 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3504 rc = VINF_PGM_GCPHYS_ALIASED;
3505 else
3506 {
3507# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC) /** @todo we can drop this now. */
3508 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3509 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3510 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
3511# endif
3512
3513 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3514 {
3515 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3516 pgmPoolTrackFlushGCPhysPT(pVM,
3517 pPhysPage,
3518 fFlushPTEs,
3519 PGMPOOL_TD_GET_IDX(u16));
3520 }
3521 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3522 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3523 else
3524 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3525 *pfFlushTLBs = true;
3526
3527# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
3528 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
3529# endif
3530 }
3531 }
3532
3533 if (rc == VINF_PGM_GCPHYS_ALIASED)
3534 {
3535 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3536 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3537 rc = VINF_PGM_SYNC_CR3;
3538 }
3539 pgmUnlock(pVM);
3540 return rc;
3541}
3542
3543
3544/**
3545 * Scans all shadow page tables for mappings of a physical page.
3546 *
3547 * This may be slow, but it's most likely more efficient than cleaning
3548 * out the entire page pool / cache.
3549 *
3550 * @returns VBox status code.
3551 * @retval VINF_SUCCESS if all references has been successfully cleared.
3552 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3553 * a page pool cleaning.
3554 *
3555 * @param pVM The VM handle.
3556 * @param pPhysPage The guest page in question.
3557 */
3558int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3559{
3560 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3561 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3562 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3563 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3564
3565 /*
3566 * There is a limit to what makes sense.
3567 */
3568 if ( pPool->cPresent > 1024
3569 && pVM->cCpus == 1)
3570 {
3571 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3572 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3573 return VINF_PGM_GCPHYS_ALIASED;
3574 }
3575
3576 /*
3577 * Iterate all the pages until we've encountered all that in use.
3578 * This is simple but not quite optimal solution.
3579 */
3580 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P; /** @todo drop X86_PTE_P here as we always test if present separately, anyway. */
3581 const uint32_t u32 = u64; /** @todo move into the 32BIT_PT_xx case */
3582 unsigned cLeft = pPool->cUsedPages;
3583 unsigned iPage = pPool->cCurPages;
3584 while (--iPage >= PGMPOOL_IDX_FIRST)
3585 {
3586 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3587 if ( pPage->GCPhys != NIL_RTGCPHYS
3588 && pPage->cPresent)
3589 {
3590 switch (pPage->enmKind)
3591 {
3592 /*
3593 * We only care about shadow page tables.
3594 */
3595 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3596 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3597 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3598 {
3599 unsigned cPresent = pPage->cPresent;
3600 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3601 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3602 if (pPT->a[i].n.u1Present)
3603 {
3604 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3605 {
3606 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3607 pPT->a[i].u = 0;
3608
3609 /* Update the counter as we're removing references. */
3610 Assert(pPage->cPresent);
3611 Assert(pPool->cPresent);
3612 pPage->cPresent--;
3613 pPool->cPresent--;
3614 }
3615 if (!--cPresent)
3616 break;
3617 }
3618 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3619 break;
3620 }
3621
3622 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3623 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3624 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3625 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3626 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3627 {
3628 unsigned cPresent = pPage->cPresent;
3629 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3630 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3631 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3632 {
3633 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3634 {
3635 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3636 PGMSHWPTEPAE_SET(pPT->a[i], 0); /// @todo why not atomic?
3637
3638 /* Update the counter as we're removing references. */
3639 Assert(pPage->cPresent);
3640 Assert(pPool->cPresent);
3641 pPage->cPresent--;
3642 pPool->cPresent--;
3643 }
3644 if (!--cPresent)
3645 break;
3646 }
3647 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3648 break;
3649 }
3650#ifndef IN_RC
3651 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3652 {
3653 unsigned cPresent = pPage->cPresent;
3654 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3655 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3656 if (pPT->a[i].n.u1Present)
3657 {
3658 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3659 {
3660 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3661 pPT->a[i].u = 0;
3662
3663 /* Update the counter as we're removing references. */
3664 Assert(pPage->cPresent);
3665 Assert(pPool->cPresent);
3666 pPage->cPresent--;
3667 pPool->cPresent--;
3668 }
3669 if (!--cPresent)
3670 break;
3671 }
3672 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3673 break;
3674 }
3675#endif
3676 }
3677 if (!--cLeft)
3678 break;
3679 }
3680 }
3681
3682 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3683 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3684
3685 /*
3686 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3687 */
3688 if (pPool->cPresent > 1024)
3689 {
3690 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3691 return VINF_PGM_GCPHYS_ALIASED;
3692 }
3693
3694 return VINF_SUCCESS;
3695}
3696
3697
3698/**
3699 * Clears the user entry in a user table.
3700 *
3701 * This is used to remove all references to a page when flushing it.
3702 */
3703static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3704{
3705 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3706 Assert(pUser->iUser < pPool->cCurPages);
3707 uint32_t iUserTable = pUser->iUserTable;
3708
3709 /*
3710 * Map the user page.
3711 */
3712 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3713 union
3714 {
3715 uint64_t *pau64;
3716 uint32_t *pau32;
3717 } u;
3718 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3719
3720 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3721
3722 /* Safety precaution in case we change the paging for other modes too in the future. */
3723 Assert(!pgmPoolIsPageLocked(pPage));
3724
3725#ifdef VBOX_STRICT
3726 /*
3727 * Some sanity checks.
3728 */
3729 switch (pUserPage->enmKind)
3730 {
3731 case PGMPOOLKIND_32BIT_PD:
3732 case PGMPOOLKIND_32BIT_PD_PHYS:
3733 Assert(iUserTable < X86_PG_ENTRIES);
3734 break;
3735 case PGMPOOLKIND_PAE_PDPT:
3736 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3737 case PGMPOOLKIND_PAE_PDPT_PHYS:
3738 Assert(iUserTable < 4);
3739 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3740 break;
3741 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3742 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3743 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3744 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3745 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3746 case PGMPOOLKIND_PAE_PD_PHYS:
3747 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3748 break;
3749 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3750 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3751 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3752 break;
3753 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3754 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3755 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3756 break;
3757 case PGMPOOLKIND_64BIT_PML4:
3758 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3759 /* GCPhys >> PAGE_SHIFT is the index here */
3760 break;
3761 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3762 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3763 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3764 break;
3765
3766 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3767 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3768 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3769 break;
3770
3771 case PGMPOOLKIND_ROOT_NESTED:
3772 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3773 break;
3774
3775 default:
3776 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3777 break;
3778 }
3779#endif /* VBOX_STRICT */
3780
3781 /*
3782 * Clear the entry in the user page.
3783 */
3784 switch (pUserPage->enmKind)
3785 {
3786 /* 32-bit entries */
3787 case PGMPOOLKIND_32BIT_PD:
3788 case PGMPOOLKIND_32BIT_PD_PHYS:
3789 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
3790 break;
3791
3792 /* 64-bit entries */
3793 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3794 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3795 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3796 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3797 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3798#ifdef IN_RC
3799 /*
3800 * In 32 bits PAE mode we *must* invalidate the TLB when changing a
3801 * PDPT entry; the CPU fetches them only during cr3 load, so any
3802 * non-present PDPT will continue to cause page faults.
3803 */
3804 ASMReloadCR3();
3805 /* no break */
3806#endif
3807 case PGMPOOLKIND_PAE_PD_PHYS:
3808 case PGMPOOLKIND_PAE_PDPT_PHYS:
3809 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3810 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3811 case PGMPOOLKIND_64BIT_PML4:
3812 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3813 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3814 case PGMPOOLKIND_PAE_PDPT:
3815 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3816 case PGMPOOLKIND_ROOT_NESTED:
3817 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3818 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3819 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
3820 break;
3821
3822 default:
3823 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3824 }
3825 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
3826}
3827
3828
3829/**
3830 * Clears all users of a page.
3831 */
3832static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3833{
3834 /*
3835 * Free all the user records.
3836 */
3837 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3838
3839 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3840 uint16_t i = pPage->iUserHead;
3841 while (i != NIL_PGMPOOL_USER_INDEX)
3842 {
3843 /* Clear enter in user table. */
3844 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3845
3846 /* Free it. */
3847 const uint16_t iNext = paUsers[i].iNext;
3848 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3849 paUsers[i].iNext = pPool->iUserFreeHead;
3850 pPool->iUserFreeHead = i;
3851
3852 /* Next. */
3853 i = iNext;
3854 }
3855 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3856}
3857
3858
3859/**
3860 * Allocates a new physical cross reference extent.
3861 *
3862 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3863 * @param pVM The VM handle.
3864 * @param piPhysExt Where to store the phys ext index.
3865 */
3866PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3867{
3868 PGM_LOCK_ASSERT_OWNER(pVM);
3869 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3870 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3871 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3872 {
3873 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3874 return NULL;
3875 }
3876 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3877 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3878 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3879 *piPhysExt = iPhysExt;
3880 return pPhysExt;
3881}
3882
3883
3884/**
3885 * Frees a physical cross reference extent.
3886 *
3887 * @param pVM The VM handle.
3888 * @param iPhysExt The extent to free.
3889 */
3890void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3891{
3892 PGM_LOCK_ASSERT_OWNER(pVM);
3893 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3894 Assert(iPhysExt < pPool->cMaxPhysExts);
3895 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3896 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3897 {
3898 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3899 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3900 }
3901 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3902 pPool->iPhysExtFreeHead = iPhysExt;
3903}
3904
3905
3906/**
3907 * Frees a physical cross reference extent.
3908 *
3909 * @param pVM The VM handle.
3910 * @param iPhysExt The extent to free.
3911 */
3912void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3913{
3914 PGM_LOCK_ASSERT_OWNER(pVM);
3915 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3916
3917 const uint16_t iPhysExtStart = iPhysExt;
3918 PPGMPOOLPHYSEXT pPhysExt;
3919 do
3920 {
3921 Assert(iPhysExt < pPool->cMaxPhysExts);
3922 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3923 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3924 {
3925 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3926 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3927 }
3928
3929 /* next */
3930 iPhysExt = pPhysExt->iNext;
3931 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3932
3933 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3934 pPool->iPhysExtFreeHead = iPhysExtStart;
3935}
3936
3937
3938/**
3939 * Insert a reference into a list of physical cross reference extents.
3940 *
3941 * @returns The new tracking data for PGMPAGE.
3942 *
3943 * @param pVM The VM handle.
3944 * @param iPhysExt The physical extent index of the list head.
3945 * @param iShwPT The shadow page table index.
3946 * @param iPte Page table entry
3947 *
3948 */
3949static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
3950{
3951 PGM_LOCK_ASSERT_OWNER(pVM);
3952 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3953 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3954
3955 /*
3956 * Special common cases.
3957 */
3958 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
3959 {
3960 paPhysExts[iPhysExt].aidx[1] = iShwPT;
3961 paPhysExts[iPhysExt].apte[1] = iPte;
3962 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
3963 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
3964 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3965 }
3966 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3967 {
3968 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3969 paPhysExts[iPhysExt].apte[2] = iPte;
3970 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
3971 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
3972 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3973 }
3974 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
3975
3976 /*
3977 * General treatment.
3978 */
3979 const uint16_t iPhysExtStart = iPhysExt;
3980 unsigned cMax = 15;
3981 for (;;)
3982 {
3983 Assert(iPhysExt < pPool->cMaxPhysExts);
3984 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3985 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3986 {
3987 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3988 paPhysExts[iPhysExt].apte[i] = iPte;
3989 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
3990 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
3991 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3992 }
3993 if (!--cMax)
3994 {
3995 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackOverflows);
3996 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3997 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
3998 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3999 }
4000
4001 /* advance */
4002 iPhysExt = paPhysExts[iPhysExt].iNext;
4003 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4004 break;
4005 }
4006
4007 /*
4008 * Add another extent to the list.
4009 */
4010 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4011 if (!pNew)
4012 {
4013 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackNoExtentsLeft);
4014 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4015 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4016 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4017 }
4018 pNew->iNext = iPhysExtStart;
4019 pNew->aidx[0] = iShwPT;
4020 pNew->apte[0] = iPte;
4021 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4022 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4023}
4024
4025
4026/**
4027 * Add a reference to guest physical page where extents are in use.
4028 *
4029 * @returns The new tracking data for PGMPAGE.
4030 *
4031 * @param pVM The VM handle.
4032 * @param pPhysPage Pointer to the aPages entry in the ram range.
4033 * @param u16 The ram range flags (top 16-bits).
4034 * @param iShwPT The shadow page table index.
4035 * @param iPte Page table entry
4036 */
4037uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4038{
4039 pgmLock(pVM);
4040 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4041 {
4042 /*
4043 * Convert to extent list.
4044 */
4045 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4046 uint16_t iPhysExt;
4047 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4048 if (pPhysExt)
4049 {
4050 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4051 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliased);
4052 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4053 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4054 pPhysExt->aidx[1] = iShwPT;
4055 pPhysExt->apte[1] = iPte;
4056 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4057 }
4058 else
4059 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4060 }
4061 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4062 {
4063 /*
4064 * Insert into the extent list.
4065 */
4066 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4067 }
4068 else
4069 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedLots);
4070 pgmUnlock(pVM);
4071 return u16;
4072}
4073
4074
4075/**
4076 * Clear references to guest physical memory.
4077 *
4078 * @param pPool The pool.
4079 * @param pPage The page.
4080 * @param pPhysPage Pointer to the aPages entry in the ram range.
4081 * @param iPte Shadow PTE index
4082 */
4083void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4084{
4085 PVM pVM = pPool->CTX_SUFF(pVM);
4086 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4087 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4088
4089 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4090 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4091 {
4092 pgmLock(pVM);
4093
4094 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4095 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4096 do
4097 {
4098 Assert(iPhysExt < pPool->cMaxPhysExts);
4099
4100 /*
4101 * Look for the shadow page and check if it's all freed.
4102 */
4103 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4104 {
4105 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4106 && paPhysExts[iPhysExt].apte[i] == iPte)
4107 {
4108 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4109 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4110
4111 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4112 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4113 {
4114 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4115 pgmUnlock(pVM);
4116 return;
4117 }
4118
4119 /* we can free the node. */
4120 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4121 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4122 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4123 {
4124 /* lonely node */
4125 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4126 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4127 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4128 }
4129 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4130 {
4131 /* head */
4132 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4133 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4134 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4135 }
4136 else
4137 {
4138 /* in list */
4139 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4140 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4141 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4142 }
4143 iPhysExt = iPhysExtNext;
4144 pgmUnlock(pVM);
4145 return;
4146 }
4147 }
4148
4149 /* next */
4150 iPhysExtPrev = iPhysExt;
4151 iPhysExt = paPhysExts[iPhysExt].iNext;
4152 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4153
4154 pgmUnlock(pVM);
4155 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4156 }
4157 else /* nothing to do */
4158 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4159}
4160
4161/**
4162 * Clear references to guest physical memory.
4163 *
4164 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4165 * physical address is assumed to be correct, so the linear search can be
4166 * skipped and we can assert at an earlier point.
4167 *
4168 * @param pPool The pool.
4169 * @param pPage The page.
4170 * @param HCPhys The host physical address corresponding to the guest page.
4171 * @param GCPhys The guest physical address corresponding to HCPhys.
4172 * @param iPte Shadow PTE index
4173 */
4174static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4175{
4176 /*
4177 * Lookup the page and check if it checks out before derefing it.
4178 */
4179 PVM pVM = pPool->CTX_SUFF(pVM);
4180 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4181 if (pPhysPage)
4182 {
4183 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4184#ifdef LOG_ENABLED
4185 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4186 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4187#endif
4188 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4189 {
4190 Assert(pPage->cPresent);
4191 Assert(pPool->cPresent);
4192 pPage->cPresent--;
4193 pPool->cPresent--;
4194 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4195 return;
4196 }
4197
4198 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp\n",
4199 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage)));
4200 }
4201 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4202}
4203
4204
4205/**
4206 * Clear references to guest physical memory.
4207 *
4208 * @param pPool The pool.
4209 * @param pPage The page.
4210 * @param HCPhys The host physical address corresponding to the guest page.
4211 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4212 * @param iPte Shadow pte index
4213 */
4214void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4215{
4216 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4217
4218 /*
4219 * Try the hint first.
4220 */
4221 RTHCPHYS HCPhysHinted;
4222 PVM pVM = pPool->CTX_SUFF(pVM);
4223 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4224 if (pPhysPage)
4225 {
4226 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4227 Assert(HCPhysHinted);
4228 if (HCPhysHinted == HCPhys)
4229 {
4230 Assert(pPage->cPresent);
4231 Assert(pPool->cPresent);
4232 pPage->cPresent--;
4233 pPool->cPresent--;
4234 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4235 return;
4236 }
4237 }
4238 else
4239 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4240
4241 /*
4242 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4243 */
4244 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4245 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX);
4246 while (pRam)
4247 {
4248 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4249 while (iPage-- > 0)
4250 {
4251 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4252 {
4253 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4254 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4255 Assert(pPage->cPresent);
4256 Assert(pPool->cPresent);
4257 pPage->cPresent--;
4258 pPool->cPresent--;
4259 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4260 return;
4261 }
4262 }
4263 pRam = pRam->CTX_SUFF(pNext);
4264 }
4265
4266 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4267}
4268
4269
4270/**
4271 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4272 *
4273 * @param pPool The pool.
4274 * @param pPage The page.
4275 * @param pShwPT The shadow page table (mapping of the page).
4276 * @param pGstPT The guest page table.
4277 */
4278DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4279{
4280 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4281 {
4282 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4283 if (pShwPT->a[i].n.u1Present)
4284 {
4285 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4286 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4287 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK, i);
4288 if (!pPage->cPresent)
4289 break;
4290 }
4291 }
4292}
4293
4294
4295/**
4296 * Clear references to guest physical memory in a PAE / 32-bit page table.
4297 *
4298 * @param pPool The pool.
4299 * @param pPage The page.
4300 * @param pShwPT The shadow page table (mapping of the page).
4301 * @param pGstPT The guest page table (just a half one).
4302 */
4303DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4304{
4305 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4306 {
4307 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4308 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4309 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4310 {
4311 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4312 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4313 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK, i);
4314 if (!pPage->cPresent)
4315 break;
4316 }
4317 }
4318}
4319
4320
4321/**
4322 * Clear references to guest physical memory in a PAE / PAE page table.
4323 *
4324 * @param pPool The pool.
4325 * @param pPage The page.
4326 * @param pShwPT The shadow page table (mapping of the page).
4327 * @param pGstPT The guest page table.
4328 */
4329DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4330{
4331 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4332 {
4333 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4334 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4335 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4336 {
4337 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4338 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4339 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
4340 if (!pPage->cPresent)
4341 break;
4342 }
4343 }
4344}
4345
4346
4347/**
4348 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4349 *
4350 * @param pPool The pool.
4351 * @param pPage The page.
4352 * @param pShwPT The shadow page table (mapping of the page).
4353 */
4354DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4355{
4356 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4357 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4358 {
4359 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4360 if (pShwPT->a[i].n.u1Present)
4361 {
4362 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4363 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4364 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys, i);
4365 if (!pPage->cPresent)
4366 break;
4367 }
4368 }
4369}
4370
4371
4372/**
4373 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4374 *
4375 * @param pPool The pool.
4376 * @param pPage The page.
4377 * @param pShwPT The shadow page table (mapping of the page).
4378 */
4379DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4380{
4381 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4382 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4383 {
4384 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4385 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4386 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4387 {
4388 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4389 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4390 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys, i);
4391 if (!pPage->cPresent)
4392 break;
4393 }
4394 }
4395}
4396
4397
4398/**
4399 * Clear references to shadowed pages in an EPT page table.
4400 *
4401 * @param pPool The pool.
4402 * @param pPage The page.
4403 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4404 */
4405DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4406{
4407 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4408 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4409 {
4410 Assert((pShwPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4411 if (pShwPT->a[i].n.u1Present)
4412 {
4413 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4414 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4415 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys, i);
4416 if (!pPage->cPresent)
4417 break;
4418 }
4419 }
4420}
4421
4422
4423/**
4424 * Clear references to shadowed pages in a 32 bits page directory.
4425 *
4426 * @param pPool The pool.
4427 * @param pPage The page.
4428 * @param pShwPD The shadow page directory (mapping of the page).
4429 */
4430DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4431{
4432 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4433 {
4434 Assert(!(pShwPD->a[i].u & RT_BIT_32(9)));
4435 if ( pShwPD->a[i].n.u1Present
4436 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4437 )
4438 {
4439 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4440 if (pSubPage)
4441 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4442 else
4443 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4444 }
4445 }
4446}
4447
4448
4449/**
4450 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4451 *
4452 * @param pPool The pool.
4453 * @param pPage The page.
4454 * @param pShwPD The shadow page directory (mapping of the page).
4455 */
4456DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4457{
4458 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4459 {
4460 if ( pShwPD->a[i].n.u1Present
4461 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING))
4462 {
4463#ifdef PGM_WITH_LARGE_PAGES
4464 if (pShwPD->a[i].b.u1Size)
4465 {
4466 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4467 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4468 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */, i);
4469 }
4470 else
4471#endif
4472 {
4473 Assert((pShwPD->a[i].u & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4474 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4475 if (pSubPage)
4476 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4477 else
4478 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4479 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4480 }
4481 }
4482 }
4483}
4484
4485
4486/**
4487 * Clear references to shadowed pages in a PAE page directory pointer table.
4488 *
4489 * @param pPool The pool.
4490 * @param pPage The page.
4491 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4492 */
4493DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4494{
4495 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4496 {
4497 Assert((pShwPDPT->a[i].u & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4498 if ( pShwPDPT->a[i].n.u1Present
4499 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4500 )
4501 {
4502 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4503 if (pSubPage)
4504 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4505 else
4506 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4507 }
4508 }
4509}
4510
4511
4512/**
4513 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4514 *
4515 * @param pPool The pool.
4516 * @param pPage The page.
4517 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4518 */
4519DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4520{
4521 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4522 {
4523 Assert((pShwPDPT->a[i].u & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4524 if (pShwPDPT->a[i].n.u1Present)
4525 {
4526 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4527 if (pSubPage)
4528 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4529 else
4530 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4531 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4532 }
4533 }
4534}
4535
4536
4537/**
4538 * Clear references to shadowed pages in a 64-bit level 4 page table.
4539 *
4540 * @param pPool The pool.
4541 * @param pPage The page.
4542 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4543 */
4544DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4545{
4546 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4547 {
4548 Assert((pShwPML4->a[i].u & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4549 if (pShwPML4->a[i].n.u1Present)
4550 {
4551 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4552 if (pSubPage)
4553 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4554 else
4555 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4556 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4557 }
4558 }
4559}
4560
4561
4562/**
4563 * Clear references to shadowed pages in an EPT page directory.
4564 *
4565 * @param pPool The pool.
4566 * @param pPage The page.
4567 * @param pShwPD The shadow page directory (mapping of the page).
4568 */
4569DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4570{
4571 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4572 {
4573 Assert((pShwPD->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4574 if (pShwPD->a[i].n.u1Present)
4575 {
4576#ifdef PGM_WITH_LARGE_PAGES
4577 if (pShwPD->a[i].b.u1Size)
4578 {
4579 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4580 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4581 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */, i);
4582 }
4583 else
4584#endif
4585 {
4586 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4587 if (pSubPage)
4588 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4589 else
4590 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4591 }
4592 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4593 }
4594 }
4595}
4596
4597
4598/**
4599 * Clear references to shadowed pages in an EPT page directory pointer table.
4600 *
4601 * @param pPool The pool.
4602 * @param pPage The page.
4603 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4604 */
4605DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4606{
4607 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4608 {
4609 Assert((pShwPDPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4610 if (pShwPDPT->a[i].n.u1Present)
4611 {
4612 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4613 if (pSubPage)
4614 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4615 else
4616 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4617 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4618 }
4619 }
4620}
4621
4622
4623/**
4624 * Clears all references made by this page.
4625 *
4626 * This includes other shadow pages and GC physical addresses.
4627 *
4628 * @param pPool The pool.
4629 * @param pPage The page.
4630 */
4631static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4632{
4633 /*
4634 * Map the shadow page and take action according to the page kind.
4635 */
4636 PVM pVM = pPool->CTX_SUFF(pVM);
4637 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4638 switch (pPage->enmKind)
4639 {
4640 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4641 {
4642 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4643 void *pvGst;
4644 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4645 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4646 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4647 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4648 break;
4649 }
4650
4651 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4652 {
4653 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4654 void *pvGst;
4655 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4656 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
4657 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4658 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4659 break;
4660 }
4661
4662 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4663 {
4664 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4665 void *pvGst;
4666 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4667 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
4668 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4669 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4670 break;
4671 }
4672
4673 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4674 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4675 {
4676 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4677 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4678 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4679 break;
4680 }
4681
4682 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4683 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4684 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4685 {
4686 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4687 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
4688 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4689 break;
4690 }
4691
4692 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4693 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4694 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4695 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4696 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4697 case PGMPOOLKIND_PAE_PD_PHYS:
4698 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4699 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4700 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4701 break;
4702
4703 case PGMPOOLKIND_32BIT_PD_PHYS:
4704 case PGMPOOLKIND_32BIT_PD:
4705 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4706 break;
4707
4708 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4709 case PGMPOOLKIND_PAE_PDPT:
4710 case PGMPOOLKIND_PAE_PDPT_PHYS:
4711 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4712 break;
4713
4714 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4715 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4716 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4717 break;
4718
4719 case PGMPOOLKIND_64BIT_PML4:
4720 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4721 break;
4722
4723 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4724 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4725 break;
4726
4727 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4728 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4729 break;
4730
4731 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4732 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4733 break;
4734
4735 default:
4736 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4737 }
4738
4739 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4740 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4741 ASMMemZeroPage(pvShw);
4742 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4743 pPage->fZeroed = true;
4744 Assert(!pPage->cPresent);
4745 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
4746}
4747
4748
4749/**
4750 * Flushes a pool page.
4751 *
4752 * This moves the page to the free list after removing all user references to it.
4753 *
4754 * @returns VBox status code.
4755 * @retval VINF_SUCCESS on success.
4756 * @param pPool The pool.
4757 * @param HCPhys The HC physical address of the shadow page.
4758 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4759 */
4760int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4761{
4762 PVM pVM = pPool->CTX_SUFF(pVM);
4763 bool fFlushRequired = false;
4764
4765 int rc = VINF_SUCCESS;
4766 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4767 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4768 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4769
4770 /*
4771 * Quietly reject any attempts at flushing any of the special root pages.
4772 */
4773 if (pPage->idx < PGMPOOL_IDX_FIRST)
4774 {
4775 AssertFailed(); /* can no longer happen */
4776 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4777 return VINF_SUCCESS;
4778 }
4779
4780 pgmLock(pVM);
4781
4782 /*
4783 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4784 */
4785 if (pgmPoolIsPageLocked(pPage))
4786 {
4787 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4788 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4789 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4790 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4791 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4792 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4793 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4794 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4795 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
4796 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
4797 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4798 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4799 pgmUnlock(pVM);
4800 return VINF_SUCCESS;
4801 }
4802
4803#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4804 /* Start a subset so we won't run out of mapping space. */
4805 PVMCPU pVCpu = VMMGetCpu(pVM);
4806 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
4807#endif
4808
4809 /*
4810 * Mark the page as being in need of an ASMMemZeroPage().
4811 */
4812 pPage->fZeroed = false;
4813
4814#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4815 if (pPage->fDirty)
4816 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirtyEntry, false /* do not remove */);
4817#endif
4818
4819 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4820 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4821 fFlushRequired = true;
4822
4823 /*
4824 * Clear the page.
4825 */
4826 pgmPoolTrackClearPageUsers(pPool, pPage);
4827 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4828 pgmPoolTrackDeref(pPool, pPage);
4829 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4830
4831 /*
4832 * Flush it from the cache.
4833 */
4834 pgmPoolCacheFlushPage(pPool, pPage);
4835
4836#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4837 /* Heavy stuff done. */
4838 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
4839#endif
4840
4841 /*
4842 * Deregistering the monitoring.
4843 */
4844 if (pPage->fMonitored)
4845 rc = pgmPoolMonitorFlush(pPool, pPage);
4846
4847 /*
4848 * Free the page.
4849 */
4850 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4851 pPage->iNext = pPool->iFreeHead;
4852 pPool->iFreeHead = pPage->idx;
4853 pPage->enmKind = PGMPOOLKIND_FREE;
4854 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4855 pPage->GCPhys = NIL_RTGCPHYS;
4856 pPage->fReusedFlushPending = false;
4857
4858 pPool->cUsedPages--;
4859
4860 /* Flush the TLBs of all VCPUs if required. */
4861 if ( fFlushRequired
4862 && fFlush)
4863 {
4864 PGM_INVL_ALL_VCPU_TLBS(pVM);
4865 }
4866
4867 pgmUnlock(pVM);
4868 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4869 return rc;
4870}
4871
4872
4873/**
4874 * Frees a usage of a pool page.
4875 *
4876 * The caller is responsible to updating the user table so that it no longer
4877 * references the shadow page.
4878 *
4879 * @param pPool The pool.
4880 * @param HCPhys The HC physical address of the shadow page.
4881 * @param iUser The shadow page pool index of the user table.
4882 * @param iUserTable The index into the user table (shadowed).
4883 */
4884void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4885{
4886 PVM pVM = pPool->CTX_SUFF(pVM);
4887
4888 STAM_PROFILE_START(&pPool->StatFree, a);
4889 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
4890 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4891 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4892 pgmLock(pVM);
4893 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4894 if (!pPage->fCached)
4895 pgmPoolFlushPage(pPool, pPage);
4896 pgmUnlock(pVM);
4897 STAM_PROFILE_STOP(&pPool->StatFree, a);
4898}
4899
4900
4901/**
4902 * Makes one or more free page free.
4903 *
4904 * @returns VBox status code.
4905 * @retval VINF_SUCCESS on success.
4906 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4907 *
4908 * @param pPool The pool.
4909 * @param enmKind Page table kind
4910 * @param iUser The user of the page.
4911 */
4912static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4913{
4914 PVM pVM = pPool->CTX_SUFF(pVM);
4915 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%d\n", iUser));
4916 NOREF(enmKind);
4917
4918 /*
4919 * If the pool isn't full grown yet, expand it.
4920 */
4921 if ( pPool->cCurPages < pPool->cMaxPages
4922#if defined(IN_RC)
4923 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
4924 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4925 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
4926#endif
4927 )
4928 {
4929 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4930#ifdef IN_RING3
4931 int rc = PGMR3PoolGrow(pVM);
4932#else
4933 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
4934#endif
4935 if (RT_FAILURE(rc))
4936 return rc;
4937 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4938 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4939 return VINF_SUCCESS;
4940 }
4941
4942 /*
4943 * Free one cached page.
4944 */
4945 return pgmPoolCacheFreeOne(pPool, iUser);
4946}
4947
4948
4949/**
4950 * Allocates a page from the pool.
4951 *
4952 * This page may actually be a cached page and not in need of any processing
4953 * on the callers part.
4954 *
4955 * @returns VBox status code.
4956 * @retval VINF_SUCCESS if a NEW page was allocated.
4957 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4958 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4959 *
4960 * @param pVM The VM handle.
4961 * @param GCPhys The GC physical address of the page we're gonna shadow.
4962 * For 4MB and 2MB PD entries, it's the first address the
4963 * shadow PT is covering.
4964 * @param enmKind The kind of mapping.
4965 * @param enmAccess Access type for the mapping (only relevant for big pages)
4966 * @param fA20Enabled Whether the A20 gate is enabled or not.
4967 * @param iUser The shadow page pool index of the user table.
4968 * @param iUserTable The index into the user table (shadowed).
4969 * @param fLockPage Lock the page
4970 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4971 */
4972int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
4973 uint16_t iUser, uint32_t iUserTable, bool fLockPage, PPPGMPOOLPAGE ppPage)
4974{
4975 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4976 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4977 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4978 *ppPage = NULL;
4979 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4980 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4981 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4982
4983 pgmLock(pVM);
4984
4985 if (pPool->fCacheEnabled)
4986 {
4987 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, fA20Enabled, iUser, iUserTable, ppPage);
4988 if (RT_SUCCESS(rc2))
4989 {
4990 if (fLockPage)
4991 pgmPoolLockPage(pPool, *ppPage);
4992 pgmUnlock(pVM);
4993 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4994 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4995 return rc2;
4996 }
4997 }
4998
4999 /*
5000 * Allocate a new one.
5001 */
5002 int rc = VINF_SUCCESS;
5003 uint16_t iNew = pPool->iFreeHead;
5004 if (iNew == NIL_PGMPOOL_IDX)
5005 {
5006 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
5007 if (RT_FAILURE(rc))
5008 {
5009 pgmUnlock(pVM);
5010 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
5011 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5012 return rc;
5013 }
5014 iNew = pPool->iFreeHead;
5015 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_PGM_POOL_IPE);
5016 }
5017
5018 /* unlink the free head */
5019 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5020 pPool->iFreeHead = pPage->iNext;
5021 pPage->iNext = NIL_PGMPOOL_IDX;
5022
5023 /*
5024 * Initialize it.
5025 */
5026 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5027 pPage->enmKind = enmKind;
5028 pPage->enmAccess = enmAccess;
5029 pPage->GCPhys = GCPhys;
5030 pPage->fA20Enabled = fA20Enabled;
5031 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5032 pPage->fMonitored = false;
5033 pPage->fCached = false;
5034 pPage->fDirty = false;
5035 pPage->fReusedFlushPending = false;
5036 pPage->cModifications = 0;
5037 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5038 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5039 pPage->cPresent = 0;
5040 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5041 pPage->idxDirtyEntry = 0;
5042 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5043 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5044 pPage->cLastAccessHandler = 0;
5045 pPage->cLocked = 0;
5046# ifdef VBOX_STRICT
5047 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5048# endif
5049
5050 /*
5051 * Insert into the tracking and cache. If this fails, free the page.
5052 */
5053 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5054 if (RT_FAILURE(rc3))
5055 {
5056 pPool->cUsedPages--;
5057 pPage->enmKind = PGMPOOLKIND_FREE;
5058 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5059 pPage->GCPhys = NIL_RTGCPHYS;
5060 pPage->iNext = pPool->iFreeHead;
5061 pPool->iFreeHead = pPage->idx;
5062 pgmUnlock(pVM);
5063 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5064 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
5065 return rc3;
5066 }
5067
5068 /*
5069 * Commit the allocation, clear the page and return.
5070 */
5071#ifdef VBOX_WITH_STATISTICS
5072 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5073 pPool->cUsedPagesHigh = pPool->cUsedPages;
5074#endif
5075
5076 if (!pPage->fZeroed)
5077 {
5078 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5079 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5080 ASMMemZeroPage(pv);
5081 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5082 }
5083
5084 *ppPage = pPage;
5085 if (fLockPage)
5086 pgmPoolLockPage(pPool, pPage);
5087 pgmUnlock(pVM);
5088 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5089 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5090 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5091 return rc;
5092}
5093
5094
5095/**
5096 * Frees a usage of a pool page.
5097 *
5098 * @param pVM The VM handle.
5099 * @param HCPhys The HC physical address of the shadow page.
5100 * @param iUser The shadow page pool index of the user table.
5101 * @param iUserTable The index into the user table (shadowed).
5102 */
5103void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5104{
5105 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5106 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5107 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5108}
5109
5110
5111/**
5112 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5113 *
5114 * @returns Pointer to the shadow page structure.
5115 * @param pPool The pool.
5116 * @param HCPhys The HC physical address of the shadow page.
5117 */
5118PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5119{
5120 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5121
5122 /*
5123 * Look up the page.
5124 */
5125 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5126
5127 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5128 return pPage;
5129}
5130
5131
5132/**
5133 * Internal worker for finding a page for debugging purposes, no assertions.
5134 *
5135 * @returns Pointer to the shadow page structure. NULL on if not found.
5136 * @param pPool The pool.
5137 * @param HCPhys The HC physical address of the shadow page.
5138 */
5139PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5140{
5141 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5142 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5143}
5144
5145#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5146
5147/**
5148 * Flush the specified page if present
5149 *
5150 * @param pVM The VM handle.
5151 * @param GCPhys Guest physical address of the page to flush
5152 */
5153void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5154{
5155 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5156
5157 VM_ASSERT_EMT(pVM);
5158
5159 /*
5160 * Look up the GCPhys in the hash.
5161 */
5162 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5163 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5164 if (i == NIL_PGMPOOL_IDX)
5165 return;
5166
5167 do
5168 {
5169 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5170 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5171 {
5172 switch (pPage->enmKind)
5173 {
5174 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5175 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5176 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5177 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5178 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5179 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5180 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5181 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5182 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5183 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5184 case PGMPOOLKIND_64BIT_PML4:
5185 case PGMPOOLKIND_32BIT_PD:
5186 case PGMPOOLKIND_PAE_PDPT:
5187 {
5188 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5189#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5190 if (pPage->fDirty)
5191 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5192 else
5193#endif
5194 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5195 Assert(!pgmPoolIsPageLocked(pPage));
5196 pgmPoolMonitorChainFlush(pPool, pPage);
5197 return;
5198 }
5199
5200 /* ignore, no monitoring. */
5201 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5202 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5203 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5204 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5205 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5206 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5207 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5208 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5209 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5210 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5211 case PGMPOOLKIND_ROOT_NESTED:
5212 case PGMPOOLKIND_PAE_PD_PHYS:
5213 case PGMPOOLKIND_PAE_PDPT_PHYS:
5214 case PGMPOOLKIND_32BIT_PD_PHYS:
5215 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5216 break;
5217
5218 default:
5219 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5220 }
5221 }
5222
5223 /* next */
5224 i = pPage->iNext;
5225 } while (i != NIL_PGMPOOL_IDX);
5226 return;
5227}
5228
5229#endif /* IN_RING3 */
5230#ifdef IN_RING3
5231
5232/**
5233 * Reset CPU on hot plugging.
5234 *
5235 * @param pVM The VM handle.
5236 * @param pVCpu The virtual CPU.
5237 */
5238void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5239{
5240 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5241
5242 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5243 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5244 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5245}
5246
5247
5248/**
5249 * Flushes the entire cache.
5250 *
5251 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5252 * this and execute this CR3 flush.
5253 *
5254 * @param pPool The pool.
5255 */
5256void pgmR3PoolReset(PVM pVM)
5257{
5258 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5259
5260 PGM_LOCK_ASSERT_OWNER(pVM);
5261 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5262 LogFlow(("pgmR3PoolReset:\n"));
5263
5264 /*
5265 * If there are no pages in the pool, there is nothing to do.
5266 */
5267 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5268 {
5269 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5270 return;
5271 }
5272
5273 /*
5274 * Exit the shadow mode since we're going to clear everything,
5275 * including the root page.
5276 */
5277 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5278 pgmR3ExitShadowModeBeforePoolFlush(&pVM->aCpus[i]);
5279
5280 /*
5281 * Nuke the free list and reinsert all pages into it.
5282 */
5283 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5284 {
5285 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5286
5287 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
5288 if (pPage->fMonitored)
5289 pgmPoolMonitorFlush(pPool, pPage);
5290 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5291 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5292 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5293 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5294 pPage->cModifications = 0;
5295 pPage->GCPhys = NIL_RTGCPHYS;
5296 pPage->enmKind = PGMPOOLKIND_FREE;
5297 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5298 Assert(pPage->idx == i);
5299 pPage->iNext = i + 1;
5300 pPage->fA20Enabled = true;
5301 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5302 pPage->fSeenNonGlobal = false;
5303 pPage->fMonitored = false;
5304 pPage->fDirty = false;
5305 pPage->fCached = false;
5306 pPage->fReusedFlushPending = false;
5307 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5308 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5309 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5310 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5311 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5312 pPage->cLastAccessHandler = 0;
5313 pPage->cLocked = 0;
5314#ifdef VBOX_STRICT
5315 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5316#endif
5317 }
5318 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5319 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5320 pPool->cUsedPages = 0;
5321
5322 /*
5323 * Zap and reinitialize the user records.
5324 */
5325 pPool->cPresent = 0;
5326 pPool->iUserFreeHead = 0;
5327 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5328 const unsigned cMaxUsers = pPool->cMaxUsers;
5329 for (unsigned i = 0; i < cMaxUsers; i++)
5330 {
5331 paUsers[i].iNext = i + 1;
5332 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5333 paUsers[i].iUserTable = 0xfffffffe;
5334 }
5335 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5336
5337 /*
5338 * Clear all the GCPhys links and rebuild the phys ext free list.
5339 */
5340 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
5341 pRam;
5342 pRam = pRam->CTX_SUFF(pNext))
5343 {
5344 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5345 while (iPage-- > 0)
5346 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5347 }
5348
5349 pPool->iPhysExtFreeHead = 0;
5350 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5351 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5352 for (unsigned i = 0; i < cMaxPhysExts; i++)
5353 {
5354 paPhysExts[i].iNext = i + 1;
5355 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5356 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5357 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5358 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5359 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5360 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5361 }
5362 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5363
5364 /*
5365 * Just zap the modified list.
5366 */
5367 pPool->cModifiedPages = 0;
5368 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5369
5370 /*
5371 * Clear the GCPhys hash and the age list.
5372 */
5373 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5374 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5375 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5376 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5377
5378#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5379 /* Clear all dirty pages. */
5380 pPool->idxFreeDirtyPage = 0;
5381 pPool->cDirtyPages = 0;
5382 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
5383 pPool->aDirtyPages[i].uIdx = NIL_PGMPOOL_IDX;
5384#endif
5385
5386 /*
5387 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5388 */
5389 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
5390 {
5391 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5392 pPage->iNext = NIL_PGMPOOL_IDX;
5393 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5394 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5395 pPage->cModifications = 0;
5396 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
5397 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5398 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5399 if (pPage->fMonitored)
5400 {
5401 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK,
5402 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
5403 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
5404 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
5405 pPool->pszAccessHandler);
5406 AssertFatalRCSuccess(rc);
5407 pgmPoolHashInsert(pPool, pPage);
5408 }
5409 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
5410 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
5411 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
5412 }
5413
5414 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5415 {
5416 /*
5417 * Re-enter the shadowing mode and assert Sync CR3 FF.
5418 */
5419 PVMCPU pVCpu = &pVM->aCpus[i];
5420 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5421 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5422 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5423 }
5424
5425 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5426}
5427
5428#endif /* IN_RING3 */
5429
5430#ifdef LOG_ENABLED
5431/**
5432 * Stringifies a PGMPOOLKIND value.
5433 */
5434static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5435{
5436 switch ((PGMPOOLKIND)enmKind)
5437 {
5438 case PGMPOOLKIND_INVALID:
5439 return "PGMPOOLKIND_INVALID";
5440 case PGMPOOLKIND_FREE:
5441 return "PGMPOOLKIND_FREE";
5442 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5443 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5444 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5445 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5446 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5447 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5448 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5449 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5450 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5451 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5452 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5453 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5454 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5455 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5456 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5457 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5458 case PGMPOOLKIND_32BIT_PD:
5459 return "PGMPOOLKIND_32BIT_PD";
5460 case PGMPOOLKIND_32BIT_PD_PHYS:
5461 return "PGMPOOLKIND_32BIT_PD_PHYS";
5462 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5463 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5464 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5465 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5466 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5467 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5468 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5469 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5470 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5471 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5472 case PGMPOOLKIND_PAE_PD_PHYS:
5473 return "PGMPOOLKIND_PAE_PD_PHYS";
5474 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5475 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5476 case PGMPOOLKIND_PAE_PDPT:
5477 return "PGMPOOLKIND_PAE_PDPT";
5478 case PGMPOOLKIND_PAE_PDPT_PHYS:
5479 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5480 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5481 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5482 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5483 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5484 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5485 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5486 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5487 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5488 case PGMPOOLKIND_64BIT_PML4:
5489 return "PGMPOOLKIND_64BIT_PML4";
5490 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5491 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5492 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5493 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5494 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5495 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5496 case PGMPOOLKIND_ROOT_NESTED:
5497 return "PGMPOOLKIND_ROOT_NESTED";
5498 }
5499 return "Unknown kind!";
5500}
5501#endif /* LOG_ENABLED*/
5502
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette