VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 48215

Last change on this file since 48215 was 48184, checked in by vboxsync, 11 years ago

PGMAllPool.cpp: Try work around assertion in pgmPoolMonitorChainChanging caused by cbWrite being larger than 8.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 210.5 KB
Line 
1/* $Id: PGMAllPool.cpp 48184 2013-08-30 09:57:05Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2013 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#include <VBox/vmm/pgm.h>
24#include <VBox/vmm/mm.h>
25#include <VBox/vmm/em.h>
26#include <VBox/vmm/cpum.h>
27#ifdef IN_RC
28# include <VBox/vmm/patm.h>
29#endif
30#include "PGMInternal.h"
31#include <VBox/vmm/vm.h>
32#include "PGMInline.h"
33#include <VBox/disopcode.h>
34#include <VBox/vmm/hm_vmx.h>
35
36#include <VBox/log.h>
37#include <VBox/err.h>
38#include <iprt/asm.h>
39#include <iprt/asm-amd64-x86.h>
40#include <iprt/string.h>
41
42
43/*******************************************************************************
44* Internal Functions *
45*******************************************************************************/
46RT_C_DECLS_BEGIN
47DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
48DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
49static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
50static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
51static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
52#ifndef IN_RING3
53DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
54#endif
55#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
56static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
57#endif
58#if 0 /*defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)*/
59static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
60#endif
61
62int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
63PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
64void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
65void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
66
67RT_C_DECLS_END
68
69
70/**
71 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
72 *
73 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
74 * @param enmKind The page kind.
75 */
76DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
77{
78 switch (enmKind)
79 {
80 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
81 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
82 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
83 return true;
84 default:
85 return false;
86 }
87}
88
89
90/**
91 * Flushes a chain of pages sharing the same access monitor.
92 *
93 * @returns VBox status code suitable for scheduling.
94 * @param pPool The pool.
95 * @param pPage A page in the chain.
96 * @todo VBOXSTRICTRC
97 */
98int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
99{
100 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
101
102 /*
103 * Find the list head.
104 */
105 uint16_t idx = pPage->idx;
106 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
107 {
108 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
109 {
110 idx = pPage->iMonitoredPrev;
111 Assert(idx != pPage->idx);
112 pPage = &pPool->aPages[idx];
113 }
114 }
115
116 /*
117 * Iterate the list flushing each shadow page.
118 */
119 int rc = VINF_SUCCESS;
120 for (;;)
121 {
122 idx = pPage->iMonitoredNext;
123 Assert(idx != pPage->idx);
124 if (pPage->idx >= PGMPOOL_IDX_FIRST)
125 {
126 int rc2 = pgmPoolFlushPage(pPool, pPage);
127 AssertRC(rc2);
128 }
129 /* next */
130 if (idx == NIL_PGMPOOL_IDX)
131 break;
132 pPage = &pPool->aPages[idx];
133 }
134 return rc;
135}
136
137
138/**
139 * Wrapper for getting the current context pointer to the entry being modified.
140 *
141 * @returns VBox status code suitable for scheduling.
142 * @param pVM Pointer to the VM.
143 * @param pvDst Destination address
144 * @param pvSrc Source guest virtual address.
145 * @param GCPhysSrc The source guest physical address.
146 * @param cb Size of data to read
147 */
148DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc,
149 RTGCPHYS GCPhysSrc, size_t cb)
150{
151#if defined(IN_RING3)
152 NOREF(pVM); NOREF(GCPhysSrc);
153 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
154 return VINF_SUCCESS;
155#else
156 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
157 NOREF(pvSrc);
158 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
159#endif
160}
161
162
163/**
164 * Process shadow entries before they are changed by the guest.
165 *
166 * For PT entries we will clear them. For PD entries, we'll simply check
167 * for mapping conflicts and set the SyncCR3 FF if found.
168 *
169 * @param pVCpu Pointer to the VMCPU.
170 * @param pPool The pool.
171 * @param pPage The head page.
172 * @param GCPhysFault The guest physical fault address.
173 * @param uAddress In R0 and GC this is the guest context fault address (flat).
174 * In R3 this is the host context 'fault' address.
175 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
176 */
177void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault,
178 CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, unsigned cbWrite)
179{
180 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
181 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
182 PVM pVM = pPool->CTX_SUFF(pVM);
183 NOREF(pVCpu);
184
185 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))pvAddress, GCPhysFault, cbWrite));
186
187 for (;;)
188 {
189 union
190 {
191 void *pv;
192 PX86PT pPT;
193 PPGMSHWPTPAE pPTPae;
194 PX86PD pPD;
195 PX86PDPAE pPDPae;
196 PX86PDPT pPDPT;
197 PX86PML4 pPML4;
198 } uShw;
199
200 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s\n", pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
201
202 uShw.pv = NULL;
203 switch (pPage->enmKind)
204 {
205 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
206 {
207 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
208 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
209 const unsigned iShw = off / sizeof(X86PTE);
210 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
211 if (uShw.pPT->a[iShw].n.u1Present)
212 {
213 X86PTE GstPte;
214
215 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
216 AssertRC(rc);
217 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
218 pgmPoolTracDerefGCPhysHint(pPool, pPage,
219 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
220 GstPte.u & X86_PTE_PG_MASK,
221 iShw);
222 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
223 }
224 break;
225 }
226
227 /* page/2 sized */
228 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
229 {
230 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
231 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
232 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
233 {
234 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
235 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
236 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
237 {
238 X86PTE GstPte;
239 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
240 AssertRC(rc);
241
242 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
243 pgmPoolTracDerefGCPhysHint(pPool, pPage,
244 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
245 GstPte.u & X86_PTE_PG_MASK,
246 iShw);
247 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
248 }
249 }
250 break;
251 }
252
253 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
254 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
255 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
256 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
257 {
258 unsigned iGst = off / sizeof(X86PDE);
259 unsigned iShwPdpt = iGst / 256;
260 unsigned iShw = (iGst % 256) * 2;
261 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
262
263 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
264 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
265 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
266 {
267 for (unsigned i = 0; i < 2; i++)
268 {
269# ifdef VBOX_WITH_RAW_MODE_NOT_R0
270 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
271 {
272 Assert(pgmMapAreMappingsEnabled(pVM));
273 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
274 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
275 break;
276 }
277# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
278 if (uShw.pPDPae->a[iShw+i].n.u1Present)
279 {
280 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
281 pgmPoolFree(pVM,
282 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
283 pPage->idx,
284 iShw + i);
285 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw+i].u, 0);
286 }
287
288 /* paranoia / a bit assumptive. */
289 if ( (off & 3)
290 && (off & 3) + cbWrite > 4)
291 {
292 const unsigned iShw2 = iShw + 2 + i;
293 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
294 {
295# ifdef VBOX_WITH_RAW_MODE_NOT_R0
296 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
297 {
298 Assert(pgmMapAreMappingsEnabled(pVM));
299 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
300 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
301 break;
302 }
303# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
304 if (uShw.pPDPae->a[iShw2].n.u1Present)
305 {
306 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
307 pgmPoolFree(pVM,
308 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
309 pPage->idx,
310 iShw2);
311 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
312 }
313 }
314 }
315 }
316 }
317 break;
318 }
319
320 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
321 {
322 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
323 const unsigned iShw = off / sizeof(X86PTEPAE);
324 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
325 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
326 {
327 X86PTEPAE GstPte;
328 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
329 AssertRC(rc);
330
331 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
332 pgmPoolTracDerefGCPhysHint(pPool, pPage,
333 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
334 GstPte.u & X86_PTE_PAE_PG_MASK,
335 iShw);
336 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
337 }
338
339 /* paranoia / a bit assumptive. */
340 if ( (off & 7)
341 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
342 {
343 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
344 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
345
346 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
347 {
348 X86PTEPAE GstPte;
349# ifdef IN_RING3
350 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
351# else
352 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
353# endif
354 AssertRC(rc);
355 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
356 pgmPoolTracDerefGCPhysHint(pPool, pPage,
357 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
358 GstPte.u & X86_PTE_PAE_PG_MASK,
359 iShw2);
360 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
361 }
362 }
363 break;
364 }
365
366 case PGMPOOLKIND_32BIT_PD:
367 {
368 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
369 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
370
371 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
372 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
373# ifdef VBOX_WITH_RAW_MODE_NOT_R0
374 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
375 {
376 Assert(pgmMapAreMappingsEnabled(pVM));
377 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
378 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
379 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
380 break;
381 }
382 else
383# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
384 {
385 if (uShw.pPD->a[iShw].n.u1Present)
386 {
387 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
388 pgmPoolFree(pVM,
389 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
390 pPage->idx,
391 iShw);
392 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
393 }
394 }
395 /* paranoia / a bit assumptive. */
396 if ( (off & 3)
397 && (off & 3) + cbWrite > sizeof(X86PTE))
398 {
399 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
400 if ( iShw2 != iShw
401 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
402 {
403# ifdef VBOX_WITH_RAW_MODE_NOT_R0
404 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
405 {
406 Assert(pgmMapAreMappingsEnabled(pVM));
407 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
408 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
409 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
410 break;
411 }
412# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
413 if (uShw.pPD->a[iShw2].n.u1Present)
414 {
415 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
416 pgmPoolFree(pVM,
417 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
418 pPage->idx,
419 iShw2);
420 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
421 }
422 }
423 }
424#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
425 if ( uShw.pPD->a[iShw].n.u1Present
426 && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
427 {
428 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
429# ifdef IN_RC /* TLB load - we're pushing things a bit... */
430 ASMProbeReadByte(pvAddress);
431# endif
432 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
433 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
434 }
435#endif
436 break;
437 }
438
439 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
440 {
441 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
442 const unsigned iShw = off / sizeof(X86PDEPAE);
443 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
444#ifdef VBOX_WITH_RAW_MODE_NOT_R0
445 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
446 {
447 Assert(pgmMapAreMappingsEnabled(pVM));
448 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
449 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
450 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
451 break;
452 }
453#endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
454 /*
455 * Causes trouble when the guest uses a PDE to refer to the whole page table level
456 * structure. (Invalidate here; faults later on when it tries to change the page
457 * table entries -> recheck; probably only applies to the RC case.)
458 */
459#ifdef VBOX_WITH_RAW_MODE_NOT_R0
460 else
461#endif
462 {
463 if (uShw.pPDPae->a[iShw].n.u1Present)
464 {
465 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
466 pgmPoolFree(pVM,
467 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
468 pPage->idx,
469 iShw);
470 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
471 }
472 }
473 /* paranoia / a bit assumptive. */
474 if ( (off & 7)
475 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
476 {
477 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
478 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
479
480#ifdef VBOX_WITH_RAW_MODE_NOT_R0
481 if ( iShw2 != iShw
482 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
483 {
484 Assert(pgmMapAreMappingsEnabled(pVM));
485 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
486 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
487 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
488 break;
489 }
490 else
491#endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
492 if (uShw.pPDPae->a[iShw2].n.u1Present)
493 {
494 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
495 pgmPoolFree(pVM,
496 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
497 pPage->idx,
498 iShw2);
499 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
500 }
501 }
502 break;
503 }
504
505 case PGMPOOLKIND_PAE_PDPT:
506 {
507 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
508 /*
509 * Hopefully this doesn't happen very often:
510 * - touching unused parts of the page
511 * - messing with the bits of pd pointers without changing the physical address
512 */
513 /* PDPT roots are not page aligned; 32 byte only! */
514 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
515
516 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
517 const unsigned iShw = offPdpt / sizeof(X86PDPE);
518 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
519 {
520# ifdef VBOX_WITH_RAW_MODE_NOT_R0
521 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
522 {
523 Assert(pgmMapAreMappingsEnabled(pVM));
524 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
525 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
526 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
527 break;
528 }
529 else
530# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
531 if (uShw.pPDPT->a[iShw].n.u1Present)
532 {
533 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
534 pgmPoolFree(pVM,
535 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
536 pPage->idx,
537 iShw);
538 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
539 }
540
541 /* paranoia / a bit assumptive. */
542 if ( (offPdpt & 7)
543 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
544 {
545 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
546 if ( iShw2 != iShw
547 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
548 {
549# ifdef VBOX_WITH_RAW_MODE_NOT_R0
550 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
551 {
552 Assert(pgmMapAreMappingsEnabled(pVM));
553 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
554 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
555 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
556 break;
557 }
558 else
559# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
560 if (uShw.pPDPT->a[iShw2].n.u1Present)
561 {
562 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
563 pgmPoolFree(pVM,
564 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
565 pPage->idx,
566 iShw2);
567 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
568 }
569 }
570 }
571 }
572 break;
573 }
574
575#ifndef IN_RC
576 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
577 {
578 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
579 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
580 const unsigned iShw = off / sizeof(X86PDEPAE);
581 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
582 if (uShw.pPDPae->a[iShw].n.u1Present)
583 {
584 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
585 pgmPoolFree(pVM,
586 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
587 pPage->idx,
588 iShw);
589 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
590 }
591 /* paranoia / a bit assumptive. */
592 if ( (off & 7)
593 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
594 {
595 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
596 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
597
598 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
599 if (uShw.pPDPae->a[iShw2].n.u1Present)
600 {
601 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
602 pgmPoolFree(pVM,
603 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
604 pPage->idx,
605 iShw2);
606 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
607 }
608 }
609 break;
610 }
611
612 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
613 {
614 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
615 /*
616 * Hopefully this doesn't happen very often:
617 * - messing with the bits of pd pointers without changing the physical address
618 */
619 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
620 const unsigned iShw = off / sizeof(X86PDPE);
621 if (uShw.pPDPT->a[iShw].n.u1Present)
622 {
623 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
624 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
625 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
626 }
627 /* paranoia / a bit assumptive. */
628 if ( (off & 7)
629 && (off & 7) + cbWrite > sizeof(X86PDPE))
630 {
631 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
632 if (uShw.pPDPT->a[iShw2].n.u1Present)
633 {
634 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
635 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
636 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
637 }
638 }
639 break;
640 }
641
642 case PGMPOOLKIND_64BIT_PML4:
643 {
644 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
645 /*
646 * Hopefully this doesn't happen very often:
647 * - messing with the bits of pd pointers without changing the physical address
648 */
649 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
650 const unsigned iShw = off / sizeof(X86PDPE);
651 if (uShw.pPML4->a[iShw].n.u1Present)
652 {
653 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
654 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
655 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
656 }
657 /* paranoia / a bit assumptive. */
658 if ( (off & 7)
659 && (off & 7) + cbWrite > sizeof(X86PDPE))
660 {
661 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
662 if (uShw.pPML4->a[iShw2].n.u1Present)
663 {
664 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
665 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
666 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
667 }
668 }
669 break;
670 }
671#endif /* IN_RING0 */
672
673 default:
674 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
675 }
676 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
677
678 /* next */
679 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
680 return;
681 pPage = &pPool->aPages[pPage->iMonitoredNext];
682 }
683}
684
685# ifndef IN_RING3
686
687/**
688 * Checks if a access could be a fork operation in progress.
689 *
690 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
691 *
692 * @returns true if it's likely that we're forking, otherwise false.
693 * @param pPool The pool.
694 * @param pDis The disassembled instruction.
695 * @param offFault The access offset.
696 */
697DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
698{
699 /*
700 * i386 linux is using btr to clear X86_PTE_RW.
701 * The functions involved are (2.6.16 source inspection):
702 * clear_bit
703 * ptep_set_wrprotect
704 * copy_one_pte
705 * copy_pte_range
706 * copy_pmd_range
707 * copy_pud_range
708 * copy_page_range
709 * dup_mmap
710 * dup_mm
711 * copy_mm
712 * copy_process
713 * do_fork
714 */
715 if ( pDis->pCurInstr->uOpcode == OP_BTR
716 && !(offFault & 4)
717 /** @todo Validate that the bit index is X86_PTE_RW. */
718 )
719 {
720 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
721 return true;
722 }
723 return false;
724}
725
726
727/**
728 * Determine whether the page is likely to have been reused.
729 *
730 * @returns true if we consider the page as being reused for a different purpose.
731 * @returns false if we consider it to still be a paging page.
732 * @param pVM Pointer to the VM.
733 * @param pVCpu Pointer to the VMCPU.
734 * @param pRegFrame Trap register frame.
735 * @param pDis The disassembly info for the faulting instruction.
736 * @param pvFault The fault address.
737 *
738 * @remark The REP prefix check is left to the caller because of STOSD/W.
739 */
740DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
741{
742#ifndef IN_RC
743 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
744 if ( HMHasPendingIrq(pVM)
745 && (pRegFrame->rsp - pvFault) < 32)
746 {
747 /* Fault caused by stack writes while trying to inject an interrupt event. */
748 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
749 return true;
750 }
751#else
752 NOREF(pVM); NOREF(pvFault);
753#endif
754
755 LogFlow(("Reused instr %RGv %d at %RGv param1.fUse=%llx param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->uOpcode, pvFault, pDis->Param1.fUse, pDis->Param1.Base.idxGenReg));
756
757 /* Non-supervisor mode write means it's used for something else. */
758 if (CPUMGetGuestCPL(pVCpu) == 3)
759 return true;
760
761 switch (pDis->pCurInstr->uOpcode)
762 {
763 /* call implies the actual push of the return address faulted */
764 case OP_CALL:
765 Log4(("pgmPoolMonitorIsReused: CALL\n"));
766 return true;
767 case OP_PUSH:
768 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
769 return true;
770 case OP_PUSHF:
771 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
772 return true;
773 case OP_PUSHA:
774 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
775 return true;
776 case OP_FXSAVE:
777 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
778 return true;
779 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
780 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
781 return true;
782 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
783 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
784 return true;
785 case OP_MOVSWD:
786 case OP_STOSWD:
787 if ( pDis->fPrefix == (DISPREFIX_REP|DISPREFIX_REX)
788 && pRegFrame->rcx >= 0x40
789 )
790 {
791 Assert(pDis->uCpuMode == DISCPUMODE_64BIT);
792
793 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
794 return true;
795 }
796 return false;
797 }
798 if ( ( (pDis->Param1.fUse & DISUSE_REG_GEN32)
799 || (pDis->Param1.fUse & DISUSE_REG_GEN64))
800 && (pDis->Param1.Base.idxGenReg == DISGREG_ESP))
801 {
802 Log4(("pgmPoolMonitorIsReused: ESP\n"));
803 return true;
804 }
805
806 return false;
807}
808
809
810/**
811 * Flushes the page being accessed.
812 *
813 * @returns VBox status code suitable for scheduling.
814 * @param pVM Pointer to the VM.
815 * @param pVCpu Pointer to the VMCPU.
816 * @param pPool The pool.
817 * @param pPage The pool page (head).
818 * @param pDis The disassembly of the write instruction.
819 * @param pRegFrame The trap register frame.
820 * @param GCPhysFault The fault address as guest physical address.
821 * @param pvFault The fault address.
822 * @todo VBOXSTRICTRC
823 */
824static int pgmPoolAccessHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
825 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
826{
827 NOREF(GCPhysFault);
828
829 /*
830 * First, do the flushing.
831 */
832 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
833
834 /*
835 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
836 * Must do this in raw mode (!); XP boot will fail otherwise.
837 */
838 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
839 if (rc2 == VINF_SUCCESS)
840 { /* do nothing */ }
841#ifdef VBOX_WITH_IEM
842 else if (rc2 == VINF_EM_RESCHEDULE)
843 {
844 if (rc == VINF_SUCCESS)
845 rc = VBOXSTRICTRC_VAL(rc2);
846# ifndef IN_RING3
847 VMCPU_FF_SET(pVCpu, VMCPU_FF_TO_R3);
848# endif
849 }
850#endif
851 else if (rc2 == VERR_EM_INTERPRETER)
852 {
853#ifdef IN_RC
854 if (PATMIsPatchGCAddr(pVM, pRegFrame->eip))
855 {
856 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
857 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->eip));
858 rc = VINF_SUCCESS;
859 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
860 }
861 else
862#endif
863 {
864 rc = VINF_EM_RAW_EMULATE_INSTR;
865 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
866 }
867 }
868 else if (RT_FAILURE_NP(rc2))
869 rc = VBOXSTRICTRC_VAL(rc2);
870 else
871 AssertMsgFailed(("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
872
873 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
874 return rc;
875}
876
877
878/**
879 * Handles the STOSD write accesses.
880 *
881 * @returns VBox status code suitable for scheduling.
882 * @param pVM Pointer to the VM.
883 * @param pPool The pool.
884 * @param pPage The pool page (head).
885 * @param pDis The disassembly of the write instruction.
886 * @param pRegFrame The trap register frame.
887 * @param GCPhysFault The fault address as guest physical address.
888 * @param pvFault The fault address.
889 */
890DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
891 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
892{
893 unsigned uIncrement = pDis->Param1.cb;
894 NOREF(pVM);
895
896 Assert(pDis->uCpuMode == DISCPUMODE_32BIT || pDis->uCpuMode == DISCPUMODE_64BIT);
897 Assert(pRegFrame->rcx <= 0x20);
898
899#ifdef VBOX_STRICT
900 if (pDis->uOpMode == DISCPUMODE_32BIT)
901 Assert(uIncrement == 4);
902 else
903 Assert(uIncrement == 8);
904#endif
905
906 Log3(("pgmPoolAccessHandlerSTOSD\n"));
907
908 /*
909 * Increment the modification counter and insert it into the list
910 * of modified pages the first time.
911 */
912 if (!pPage->cModifications++)
913 pgmPoolMonitorModifiedInsert(pPool, pPage);
914
915 /*
916 * Execute REP STOSD.
917 *
918 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
919 * write situation, meaning that it's safe to write here.
920 */
921 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
922 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
923 while (pRegFrame->rcx)
924 {
925#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
926 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
927 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
928 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
929#else
930 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
931#endif
932#ifdef IN_RC
933 *(uint32_t *)(uintptr_t)pu32 = pRegFrame->eax;
934#else
935 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
936#endif
937 pu32 += uIncrement;
938 GCPhysFault += uIncrement;
939 pRegFrame->rdi += uIncrement;
940 pRegFrame->rcx--;
941 }
942 pRegFrame->rip += pDis->cbInstr;
943
944 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
945 return VINF_SUCCESS;
946}
947
948
949/**
950 * Handles the simple write accesses.
951 *
952 * @returns VBox status code suitable for scheduling.
953 * @param pVM Pointer to the VM.
954 * @param pVCpu Pointer to the VMCPU.
955 * @param pPool The pool.
956 * @param pPage The pool page (head).
957 * @param pDis The disassembly of the write instruction.
958 * @param pRegFrame The trap register frame.
959 * @param GCPhysFault The fault address as guest physical address.
960 * @param pvFault The fault address.
961 * @param pfReused Reused state (in/out)
962 */
963DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
964 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
965{
966 Log3(("pgmPoolAccessHandlerSimple\n"));
967 NOREF(pfReused); /* initialized by caller */
968
969 /*
970 * Increment the modification counter and insert it into the list
971 * of modified pages the first time.
972 */
973 if (!pPage->cModifications++)
974 pgmPoolMonitorModifiedInsert(pPool, pPage);
975
976 /*
977 * Clear all the pages. ASSUMES that pvFault is readable.
978 */
979#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
980 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
981#endif
982
983 uint32_t cbWrite = DISGetParamSize(pDis, &pDis->Param1);
984 if (cbWrite <= 8)
985 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, cbWrite);
986 else
987 {
988 Assert(cbWrite <= 16);
989 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, 8);
990 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + 8, pvFault + 8, cbWrite - 8);
991 }
992
993#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
994 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
995#endif
996
997 /*
998 * Interpret the instruction.
999 */
1000 VBOXSTRICTRC rc = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
1001 if (RT_SUCCESS(rc))
1002 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
1003 else if (rc == VERR_EM_INTERPRETER)
1004 {
1005 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1006 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode));
1007 rc = VINF_EM_RAW_EMULATE_INSTR;
1008 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1009 }
1010
1011#if 0 /* experimental code */
1012 if (rc == VINF_SUCCESS)
1013 {
1014 switch (pPage->enmKind)
1015 {
1016 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1017 {
1018 X86PTEPAE GstPte;
1019 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1020 AssertRC(rc);
1021
1022 /* Check the new value written by the guest. If present and with a bogus physical address, then
1023 * it's fairly safe to assume the guest is reusing the PT.
1024 */
1025 if (GstPte.n.u1Present)
1026 {
1027 RTHCPHYS HCPhys = -1;
1028 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1029 if (rc != VINF_SUCCESS)
1030 {
1031 *pfReused = true;
1032 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1033 }
1034 }
1035 break;
1036 }
1037 }
1038 }
1039#endif
1040
1041 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc\n", VBOXSTRICTRC_VAL(rc)));
1042 return VBOXSTRICTRC_VAL(rc);
1043}
1044
1045
1046/**
1047 * \#PF Handler callback for PT write accesses.
1048 *
1049 * @returns VBox status code (appropriate for GC return).
1050 * @param pVM Pointer to the VM.
1051 * @param uErrorCode CPU Error code.
1052 * @param pRegFrame Trap register frame.
1053 * NULL on DMA and other non CPU access.
1054 * @param pvFault The fault address (cr2).
1055 * @param GCPhysFault The GC physical address corresponding to pvFault.
1056 * @param pvUser User argument.
1057 */
1058DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault,
1059 RTGCPHYS GCPhysFault, void *pvUser)
1060{
1061 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1062 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1063 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1064 PVMCPU pVCpu = VMMGetCpu(pVM);
1065 unsigned cMaxModifications;
1066 bool fForcedFlush = false;
1067 NOREF(uErrorCode);
1068
1069 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1070
1071 pgmLock(pVM);
1072 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1073 {
1074 /* Pool page changed while we were waiting for the lock; ignore. */
1075 Log(("CPU%d: pgmPoolAccessHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1076 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1077 pgmUnlock(pVM);
1078 return VINF_SUCCESS;
1079 }
1080#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1081 if (pPage->fDirty)
1082 {
1083 Assert(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH));
1084 pgmUnlock(pVM);
1085 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1086 }
1087#endif
1088
1089#if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1090 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1091 {
1092 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1093 void *pvGst;
1094 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1095 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1096 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1097 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1098 }
1099#endif
1100
1101 /*
1102 * Disassemble the faulting instruction.
1103 */
1104 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1105 int rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
1106 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1107 {
1108 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1109 pgmUnlock(pVM);
1110 return rc;
1111 }
1112
1113 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1114
1115 /*
1116 * We should ALWAYS have the list head as user parameter. This
1117 * is because we use that page to record the changes.
1118 */
1119 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1120
1121#ifdef IN_RING0
1122 /* Maximum nr of modifications depends on the page type. */
1123 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1124 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1125 cMaxModifications = 4;
1126 else
1127 cMaxModifications = 24;
1128#else
1129 cMaxModifications = 48;
1130#endif
1131
1132 /*
1133 * Incremental page table updates should weigh more than random ones.
1134 * (Only applies when started from offset 0)
1135 */
1136 pVCpu->pgm.s.cPoolAccessHandler++;
1137 if ( pPage->GCPtrLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1138 && pPage->GCPtrLastAccessHandlerRip < pRegFrame->rip + 0x40
1139 && pvFault == (pPage->GCPtrLastAccessHandlerFault + pDis->Param1.cb)
1140 && pVCpu->pgm.s.cPoolAccessHandler == pPage->cLastAccessHandler + 1)
1141 {
1142 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1143 Assert(pPage->cModifications < 32000);
1144 pPage->cModifications = pPage->cModifications * 2;
1145 pPage->GCPtrLastAccessHandlerFault = pvFault;
1146 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1147 if (pPage->cModifications >= cMaxModifications)
1148 {
1149 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1150 fForcedFlush = true;
1151 }
1152 }
1153
1154 if (pPage->cModifications >= cMaxModifications)
1155 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1156
1157 /*
1158 * Check if it's worth dealing with.
1159 */
1160 bool fReused = false;
1161 bool fNotReusedNotForking = false;
1162 if ( ( pPage->cModifications < cMaxModifications /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1163 || pgmPoolIsPageLocked(pPage)
1164 )
1165 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1166 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1167 {
1168 /*
1169 * Simple instructions, no REP prefix.
1170 */
1171 if (!(pDis->fPrefix & (DISPREFIX_REP | DISPREFIX_REPNE)))
1172 {
1173 rc = pgmPoolAccessHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1174 if (fReused)
1175 goto flushPage;
1176
1177 /* A mov instruction to change the first page table entry will be remembered so we can detect
1178 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1179 */
1180 if ( rc == VINF_SUCCESS
1181 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1182 && pDis->pCurInstr->uOpcode == OP_MOV
1183 && (pvFault & PAGE_OFFSET_MASK) == 0)
1184 {
1185 pPage->GCPtrLastAccessHandlerFault = pvFault;
1186 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1187 pPage->GCPtrLastAccessHandlerRip = pRegFrame->rip;
1188 /* Make sure we don't kick out a page too quickly. */
1189 if (pPage->cModifications > 8)
1190 pPage->cModifications = 2;
1191 }
1192 else if (pPage->GCPtrLastAccessHandlerFault == pvFault)
1193 {
1194 /* ignore the 2nd write to this page table entry. */
1195 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1196 }
1197 else
1198 {
1199 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
1200 pPage->GCPtrLastAccessHandlerRip = 0;
1201 }
1202
1203 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1204 pgmUnlock(pVM);
1205 return rc;
1206 }
1207
1208 /*
1209 * Windows is frequently doing small memset() operations (netio test 4k+).
1210 * We have to deal with these or we'll kill the cache and performance.
1211 */
1212 if ( pDis->pCurInstr->uOpcode == OP_STOSWD
1213 && !pRegFrame->eflags.Bits.u1DF
1214 && pDis->uOpMode == pDis->uCpuMode
1215 && pDis->uAddrMode == pDis->uCpuMode)
1216 {
1217 bool fValidStosd = false;
1218
1219 if ( pDis->uCpuMode == DISCPUMODE_32BIT
1220 && pDis->fPrefix == DISPREFIX_REP
1221 && pRegFrame->ecx <= 0x20
1222 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1223 && !((uintptr_t)pvFault & 3)
1224 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1225 )
1226 {
1227 fValidStosd = true;
1228 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1229 }
1230 else
1231 if ( pDis->uCpuMode == DISCPUMODE_64BIT
1232 && pDis->fPrefix == (DISPREFIX_REP | DISPREFIX_REX)
1233 && pRegFrame->rcx <= 0x20
1234 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1235 && !((uintptr_t)pvFault & 7)
1236 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1237 )
1238 {
1239 fValidStosd = true;
1240 }
1241
1242 if (fValidStosd)
1243 {
1244 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1245 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1246 pgmUnlock(pVM);
1247 return rc;
1248 }
1249 }
1250
1251 /* REP prefix, don't bother. */
1252 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1253 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1254 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode, pDis->fPrefix));
1255 fNotReusedNotForking = true;
1256 }
1257
1258#if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1259 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1260 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1261 */
1262 if ( pPage->cModifications >= cMaxModifications
1263 && !fForcedFlush
1264 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1265 && ( fNotReusedNotForking
1266 || ( !pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
1267 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1268 )
1269 )
1270 {
1271 Assert(!pgmPoolIsPageLocked(pPage));
1272 Assert(pPage->fDirty == false);
1273
1274 /* Flush any monitored duplicates as we will disable write protection. */
1275 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1276 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1277 {
1278 PPGMPOOLPAGE pPageHead = pPage;
1279
1280 /* Find the monitor head. */
1281 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1282 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1283
1284 while (pPageHead)
1285 {
1286 unsigned idxNext = pPageHead->iMonitoredNext;
1287
1288 if (pPageHead != pPage)
1289 {
1290 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1291 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1292 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1293 AssertRC(rc2);
1294 }
1295
1296 if (idxNext == NIL_PGMPOOL_IDX)
1297 break;
1298
1299 pPageHead = &pPool->aPages[idxNext];
1300 }
1301 }
1302
1303 /* The flushing above might fail for locked pages, so double check. */
1304 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1305 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1306 {
1307 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1308
1309 /* Temporarily allow write access to the page table again. */
1310 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK, pPage->GCPhys & PAGE_BASE_GC_MASK);
1311 if (rc == VINF_SUCCESS)
1312 {
1313 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1314 AssertMsg(rc == VINF_SUCCESS
1315 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1316 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1317 || rc == VERR_PAGE_NOT_PRESENT,
1318 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1319# ifdef VBOX_STRICT
1320 pPage->GCPtrDirtyFault = pvFault;
1321# endif
1322
1323 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1324 pgmUnlock(pVM);
1325 return rc;
1326 }
1327 }
1328 }
1329#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1330
1331 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushModOverflow));
1332flushPage:
1333 /*
1334 * Not worth it, so flush it.
1335 *
1336 * If we considered it to be reused, don't go back to ring-3
1337 * to emulate failed instructions since we usually cannot
1338 * interpret then. This may be a bit risky, in which case
1339 * the reuse detection must be fixed.
1340 */
1341 rc = pgmPoolAccessHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1342 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1343 && fReused)
1344 {
1345 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1346 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1347 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1348 }
1349 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1350 pgmUnlock(pVM);
1351 return rc;
1352}
1353
1354# endif /* !IN_RING3 */
1355
1356# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1357
1358# if defined(VBOX_STRICT) && !defined(IN_RING3)
1359
1360/**
1361 * Check references to guest physical memory in a PAE / PAE page table.
1362 *
1363 * @param pPool The pool.
1364 * @param pPage The page.
1365 * @param pShwPT The shadow page table (mapping of the page).
1366 * @param pGstPT The guest page table.
1367 */
1368static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1369{
1370 unsigned cErrors = 0;
1371 int LastRc = -1; /* initialized to shut up gcc */
1372 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1373 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1374 PVM pVM = pPool->CTX_SUFF(pVM);
1375
1376#ifdef VBOX_STRICT
1377 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1378 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1379#endif
1380 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1381 {
1382 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1383 {
1384 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1385 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1386 if ( rc != VINF_SUCCESS
1387 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1388 {
1389 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1390 LastPTE = i;
1391 LastRc = rc;
1392 LastHCPhys = HCPhys;
1393 cErrors++;
1394
1395 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1396 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1397 AssertRC(rc);
1398
1399 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1400 {
1401 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1402
1403 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1404 {
1405 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1406
1407 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1408 {
1409 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1410 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1411 {
1412 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1413 }
1414 }
1415
1416 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1417 }
1418 }
1419 }
1420 }
1421 }
1422 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1423}
1424
1425
1426/**
1427 * Check references to guest physical memory in a PAE / 32-bit page table.
1428 *
1429 * @param pPool The pool.
1430 * @param pPage The page.
1431 * @param pShwPT The shadow page table (mapping of the page).
1432 * @param pGstPT The guest page table.
1433 */
1434static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1435{
1436 unsigned cErrors = 0;
1437 int LastRc = -1; /* initialized to shut up gcc */
1438 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1439 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1440 PVM pVM = pPool->CTX_SUFF(pVM);
1441
1442#ifdef VBOX_STRICT
1443 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1444 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1445#endif
1446 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1447 {
1448 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1449 {
1450 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1451 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1452 if ( rc != VINF_SUCCESS
1453 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1454 {
1455 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1456 LastPTE = i;
1457 LastRc = rc;
1458 LastHCPhys = HCPhys;
1459 cErrors++;
1460
1461 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1462 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1463 AssertRC(rc);
1464
1465 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1466 {
1467 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1468
1469 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1470 {
1471 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1472
1473 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1474 {
1475 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1476 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1477 {
1478 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1479 }
1480 }
1481
1482 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1483 }
1484 }
1485 }
1486 }
1487 }
1488 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1489}
1490
1491# endif /* VBOX_STRICT && !IN_RING3 */
1492
1493/**
1494 * Clear references to guest physical memory in a PAE / PAE page table.
1495 *
1496 * @returns nr of changed PTEs
1497 * @param pPool The pool.
1498 * @param pPage The page.
1499 * @param pShwPT The shadow page table (mapping of the page).
1500 * @param pGstPT The guest page table.
1501 * @param pOldGstPT The old cached guest page table.
1502 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1503 * @param pfFlush Flush reused page table (out)
1504 */
1505DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1506 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1507{
1508 unsigned cChanged = 0;
1509
1510#ifdef VBOX_STRICT
1511 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1512 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1513#endif
1514 *pfFlush = false;
1515
1516 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1517 {
1518 /* Check the new value written by the guest. If present and with a bogus physical address, then
1519 * it's fairly safe to assume the guest is reusing the PT.
1520 */
1521 if ( fAllowRemoval
1522 && pGstPT->a[i].n.u1Present)
1523 {
1524 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1525 {
1526 *pfFlush = true;
1527 return ++cChanged;
1528 }
1529 }
1530 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1531 {
1532 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1533 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1534 {
1535#ifdef VBOX_STRICT
1536 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1537 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1538 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1539#endif
1540 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1541 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1542 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1543 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1544
1545 if ( uHostAttr == uGuestAttr
1546 && fHostRW <= fGuestRW)
1547 continue;
1548 }
1549 cChanged++;
1550 /* Something was changed, so flush it. */
1551 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1552 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1553 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1554 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1555 }
1556 }
1557 return cChanged;
1558}
1559
1560
1561/**
1562 * Clear references to guest physical memory in a PAE / PAE page table.
1563 *
1564 * @returns nr of changed PTEs
1565 * @param pPool The pool.
1566 * @param pPage The page.
1567 * @param pShwPT The shadow page table (mapping of the page).
1568 * @param pGstPT The guest page table.
1569 * @param pOldGstPT The old cached guest page table.
1570 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1571 * @param pfFlush Flush reused page table (out)
1572 */
1573DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1574 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1575{
1576 unsigned cChanged = 0;
1577
1578#ifdef VBOX_STRICT
1579 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1580 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1581#endif
1582 *pfFlush = false;
1583
1584 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1585 {
1586 /* Check the new value written by the guest. If present and with a bogus physical address, then
1587 * it's fairly safe to assume the guest is reusing the PT.
1588 */
1589 if ( fAllowRemoval
1590 && pGstPT->a[i].n.u1Present)
1591 {
1592 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK))
1593 {
1594 *pfFlush = true;
1595 return ++cChanged;
1596 }
1597 }
1598 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1599 {
1600 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1601 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1602 {
1603#ifdef VBOX_STRICT
1604 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1605 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1606 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1607#endif
1608 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1609 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1610 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1611 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1612
1613 if ( uHostAttr == uGuestAttr
1614 && fHostRW <= fGuestRW)
1615 continue;
1616 }
1617 cChanged++;
1618 /* Something was changed, so flush it. */
1619 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1620 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1621 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1622 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1623 }
1624 }
1625 return cChanged;
1626}
1627
1628
1629/**
1630 * Flush a dirty page
1631 *
1632 * @param pVM Pointer to the VM.
1633 * @param pPool The pool.
1634 * @param idxSlot Dirty array slot index
1635 * @param fAllowRemoval Allow a reused page table to be removed
1636 */
1637static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1638{
1639 PPGMPOOLPAGE pPage;
1640 unsigned idxPage;
1641
1642 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1643 if (pPool->aDirtyPages[idxSlot].uIdx == NIL_PGMPOOL_IDX)
1644 return;
1645
1646 idxPage = pPool->aDirtyPages[idxSlot].uIdx;
1647 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1648 pPage = &pPool->aPages[idxPage];
1649 Assert(pPage->idx == idxPage);
1650 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1651
1652 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1653 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1654
1655#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1656 PVMCPU pVCpu = VMMGetCpu(pVM);
1657 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
1658#endif
1659
1660 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1661 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK);
1662 Assert(rc == VINF_SUCCESS);
1663 pPage->fDirty = false;
1664
1665#ifdef VBOX_STRICT
1666 uint64_t fFlags = 0;
1667 RTHCPHYS HCPhys;
1668 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->GCPtrDirtyFault, &fFlags, &HCPhys);
1669 AssertMsg( ( rc == VINF_SUCCESS
1670 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1671 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1672 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1673 || rc == VERR_PAGE_NOT_PRESENT,
1674 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->GCPtrDirtyFault, rc, fFlags));
1675#endif
1676
1677 /* Flush those PTEs that have changed. */
1678 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1679 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1680 void *pvGst;
1681 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1682 bool fFlush;
1683 unsigned cChanges;
1684
1685 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1686 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1687 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1688 else
1689 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1690 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1691
1692 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1693 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1694 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1695 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1696
1697 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1698 Assert(pPage->cModifications);
1699 if (cChanges < 4)
1700 pPage->cModifications = 1; /* must use > 0 here */
1701 else
1702 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1703
1704 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1705 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1706 pPool->idxFreeDirtyPage = idxSlot;
1707
1708 pPool->cDirtyPages--;
1709 pPool->aDirtyPages[idxSlot].uIdx = NIL_PGMPOOL_IDX;
1710 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1711 if (fFlush)
1712 {
1713 Assert(fAllowRemoval);
1714 Log(("Flush reused page table!\n"));
1715 pgmPoolFlushPage(pPool, pPage);
1716 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1717 }
1718 else
1719 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1720
1721#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1722 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
1723#endif
1724}
1725
1726
1727# ifndef IN_RING3
1728/**
1729 * Add a new dirty page
1730 *
1731 * @param pVM Pointer to the VM.
1732 * @param pPool The pool.
1733 * @param pPage The page.
1734 */
1735void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1736{
1737 unsigned idxFree;
1738
1739 PGM_LOCK_ASSERT_OWNER(pVM);
1740 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1741 Assert(!pPage->fDirty);
1742
1743 idxFree = pPool->idxFreeDirtyPage;
1744 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1745 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1746
1747 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1748 {
1749 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1750 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1751 }
1752 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1753 AssertMsg(pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1754
1755 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1756
1757 /*
1758 * Make a copy of the guest page table as we require valid GCPhys addresses
1759 * when removing references to physical pages.
1760 * (The HCPhys linear lookup is *extremely* expensive!)
1761 */
1762 void *pvGst;
1763 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1764 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst, (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT) ? PAGE_SIZE : PAGE_SIZE/2);
1765# ifdef VBOX_STRICT
1766 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1767 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1768 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1769 else
1770 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1771 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1772# endif
1773 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1774
1775 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1776 pPage->fDirty = true;
1777 pPage->idxDirtyEntry = (uint8_t)idxFree; Assert(pPage->idxDirtyEntry == idxFree);
1778 pPool->aDirtyPages[idxFree].uIdx = pPage->idx;
1779 pPool->cDirtyPages++;
1780
1781 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1782 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1783 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1784 {
1785 unsigned i;
1786 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1787 {
1788 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1789 if (pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX)
1790 {
1791 pPool->idxFreeDirtyPage = idxFree;
1792 break;
1793 }
1794 }
1795 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1796 }
1797
1798 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX);
1799 return;
1800}
1801# endif /* !IN_RING3 */
1802
1803
1804/**
1805 * Check if the specified page is dirty (not write monitored)
1806 *
1807 * @return dirty or not
1808 * @param pVM Pointer to the VM.
1809 * @param GCPhys Guest physical address
1810 */
1811bool pgmPoolIsDirtyPage(PVM pVM, RTGCPHYS GCPhys)
1812{
1813 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1814 PGM_LOCK_ASSERT_OWNER(pVM);
1815 if (!pPool->cDirtyPages)
1816 return false;
1817
1818 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1819
1820 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1821 {
1822 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
1823 {
1824 PPGMPOOLPAGE pPage;
1825 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
1826
1827 pPage = &pPool->aPages[idxPage];
1828 if (pPage->GCPhys == GCPhys)
1829 return true;
1830 }
1831 }
1832 return false;
1833}
1834
1835
1836/**
1837 * Reset all dirty pages by reinstating page monitoring.
1838 *
1839 * @param pVM Pointer to the VM.
1840 */
1841void pgmPoolResetDirtyPages(PVM pVM)
1842{
1843 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1844 PGM_LOCK_ASSERT_OWNER(pVM);
1845 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1846
1847 if (!pPool->cDirtyPages)
1848 return;
1849
1850 Log(("pgmPoolResetDirtyPages\n"));
1851 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1852 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1853
1854 pPool->idxFreeDirtyPage = 0;
1855 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1856 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1857 {
1858 unsigned i;
1859 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1860 {
1861 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
1862 {
1863 pPool->idxFreeDirtyPage = i;
1864 break;
1865 }
1866 }
1867 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1868 }
1869
1870 Assert(pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
1871 return;
1872}
1873
1874
1875/**
1876 * Invalidate the PT entry for the specified page
1877 *
1878 * @param pVM Pointer to the VM.
1879 * @param GCPtrPage Guest page to invalidate
1880 */
1881void pgmPoolResetDirtyPage(PVM pVM, RTGCPTR GCPtrPage)
1882{
1883 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1884 PGM_LOCK_ASSERT_OWNER(pVM);
1885 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1886
1887 if (!pPool->cDirtyPages)
1888 return;
1889
1890 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage));
1891 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1892 {
1893 }
1894}
1895
1896
1897/**
1898 * Reset all dirty pages by reinstating page monitoring.
1899 *
1900 * @param pVM Pointer to the VM.
1901 * @param GCPhysPT Physical address of the page table
1902 */
1903void pgmPoolInvalidateDirtyPage(PVM pVM, RTGCPHYS GCPhysPT)
1904{
1905 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1906 PGM_LOCK_ASSERT_OWNER(pVM);
1907 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1908 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
1909
1910 if (!pPool->cDirtyPages)
1911 return;
1912
1913 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1914
1915 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1916 {
1917 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
1918 {
1919 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
1920
1921 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1922 if (pPage->GCPhys == GCPhysPT)
1923 {
1924 idxDirtyPage = i;
1925 break;
1926 }
1927 }
1928 }
1929
1930 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
1931 {
1932 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
1933 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1934 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1935 {
1936 unsigned i;
1937 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1938 {
1939 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
1940 {
1941 pPool->idxFreeDirtyPage = i;
1942 break;
1943 }
1944 }
1945 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1946 }
1947 }
1948}
1949
1950# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1951
1952/**
1953 * Inserts a page into the GCPhys hash table.
1954 *
1955 * @param pPool The pool.
1956 * @param pPage The page.
1957 */
1958DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1959{
1960 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1961 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1962 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1963 pPage->iNext = pPool->aiHash[iHash];
1964 pPool->aiHash[iHash] = pPage->idx;
1965}
1966
1967
1968/**
1969 * Removes a page from the GCPhys hash table.
1970 *
1971 * @param pPool The pool.
1972 * @param pPage The page.
1973 */
1974DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1975{
1976 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1977 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1978 if (pPool->aiHash[iHash] == pPage->idx)
1979 pPool->aiHash[iHash] = pPage->iNext;
1980 else
1981 {
1982 uint16_t iPrev = pPool->aiHash[iHash];
1983 for (;;)
1984 {
1985 const int16_t i = pPool->aPages[iPrev].iNext;
1986 if (i == pPage->idx)
1987 {
1988 pPool->aPages[iPrev].iNext = pPage->iNext;
1989 break;
1990 }
1991 if (i == NIL_PGMPOOL_IDX)
1992 {
1993 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
1994 break;
1995 }
1996 iPrev = i;
1997 }
1998 }
1999 pPage->iNext = NIL_PGMPOOL_IDX;
2000}
2001
2002
2003/**
2004 * Frees up one cache page.
2005 *
2006 * @returns VBox status code.
2007 * @retval VINF_SUCCESS on success.
2008 * @param pPool The pool.
2009 * @param iUser The user index.
2010 */
2011static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
2012{
2013#ifndef IN_RC
2014 const PVM pVM = pPool->CTX_SUFF(pVM);
2015#endif
2016 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
2017 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
2018
2019 /*
2020 * Select one page from the tail of the age list.
2021 */
2022 PPGMPOOLPAGE pPage;
2023 for (unsigned iLoop = 0; ; iLoop++)
2024 {
2025 uint16_t iToFree = pPool->iAgeTail;
2026 if (iToFree == iUser && iUser != NIL_PGMPOOL_IDX)
2027 iToFree = pPool->aPages[iToFree].iAgePrev;
2028/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2029 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2030 {
2031 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2032 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2033 {
2034 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2035 continue;
2036 iToFree = i;
2037 break;
2038 }
2039 }
2040*/
2041 Assert(iToFree != iUser);
2042 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
2043 pPage = &pPool->aPages[iToFree];
2044
2045 /*
2046 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2047 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2048 */
2049 if ( !pgmPoolIsPageLocked(pPage)
2050 && pPage->idx >= PGMPOOL_IDX_FIRST /* paranoia (#6349) */)
2051 break;
2052 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2053 pgmPoolCacheUsed(pPool, pPage);
2054 AssertLogRelReturn(iLoop < 8192, VERR_PGM_POOL_TOO_MANY_LOOPS);
2055 }
2056
2057 /*
2058 * Found a usable page, flush it and return.
2059 */
2060 int rc = pgmPoolFlushPage(pPool, pPage);
2061 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2062 /* todo: find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2063 if (rc == VINF_SUCCESS)
2064 PGM_INVL_ALL_VCPU_TLBS(pVM);
2065 return rc;
2066}
2067
2068
2069/**
2070 * Checks if a kind mismatch is really a page being reused
2071 * or if it's just normal remappings.
2072 *
2073 * @returns true if reused and the cached page (enmKind1) should be flushed
2074 * @returns false if not reused.
2075 * @param enmKind1 The kind of the cached page.
2076 * @param enmKind2 The kind of the requested page.
2077 */
2078static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2079{
2080 switch (enmKind1)
2081 {
2082 /*
2083 * Never reuse them. There is no remapping in non-paging mode.
2084 */
2085 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2086 case PGMPOOLKIND_32BIT_PD_PHYS:
2087 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2088 case PGMPOOLKIND_PAE_PD_PHYS:
2089 case PGMPOOLKIND_PAE_PDPT_PHYS:
2090 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2091 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2092 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2093 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2094 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2095 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2096 return false;
2097
2098 /*
2099 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2100 */
2101 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2102 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2103 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2104 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2105 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2106 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2107 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2108 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2109 case PGMPOOLKIND_32BIT_PD:
2110 case PGMPOOLKIND_PAE_PDPT:
2111 switch (enmKind2)
2112 {
2113 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2114 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2115 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2116 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2117 case PGMPOOLKIND_64BIT_PML4:
2118 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2119 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2120 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2121 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2122 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2123 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2124 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2125 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2126 return true;
2127 default:
2128 return false;
2129 }
2130
2131 /*
2132 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2133 */
2134 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2135 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2136 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2137 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2138 case PGMPOOLKIND_64BIT_PML4:
2139 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2140 switch (enmKind2)
2141 {
2142 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2143 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2144 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2145 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2146 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2147 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2148 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2149 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2150 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2151 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2152 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2153 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2154 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2155 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2156 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2157 return true;
2158 default:
2159 return false;
2160 }
2161
2162 /*
2163 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2164 */
2165 case PGMPOOLKIND_ROOT_NESTED:
2166 return false;
2167
2168 default:
2169 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2170 }
2171}
2172
2173
2174/**
2175 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2176 *
2177 * @returns VBox status code.
2178 * @retval VINF_PGM_CACHED_PAGE on success.
2179 * @retval VERR_FILE_NOT_FOUND if not found.
2180 * @param pPool The pool.
2181 * @param GCPhys The GC physical address of the page we're gonna shadow.
2182 * @param enmKind The kind of mapping.
2183 * @param enmAccess Access type for the mapping (only relevant for big pages)
2184 * @param fA20Enabled Whether the CPU has the A20 gate enabled.
2185 * @param iUser The shadow page pool index of the user table. This is
2186 * NIL_PGMPOOL_IDX for root pages.
2187 * @param iUserTable The index into the user table (shadowed). Ignored if
2188 * root page
2189 * @param ppPage Where to store the pointer to the page.
2190 */
2191static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
2192 uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2193{
2194 /*
2195 * Look up the GCPhys in the hash.
2196 */
2197 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2198 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2199 if (i != NIL_PGMPOOL_IDX)
2200 {
2201 do
2202 {
2203 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2204 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2205 if (pPage->GCPhys == GCPhys)
2206 {
2207 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2208 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess
2209 && pPage->fA20Enabled == fA20Enabled)
2210 {
2211 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2212 * doesn't flush it in case there are no more free use records.
2213 */
2214 pgmPoolCacheUsed(pPool, pPage);
2215
2216 int rc = VINF_SUCCESS;
2217 if (iUser != NIL_PGMPOOL_IDX)
2218 rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2219 if (RT_SUCCESS(rc))
2220 {
2221 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2222 *ppPage = pPage;
2223 if (pPage->cModifications)
2224 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2225 STAM_COUNTER_INC(&pPool->StatCacheHits);
2226 return VINF_PGM_CACHED_PAGE;
2227 }
2228 return rc;
2229 }
2230
2231 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2232 {
2233 /*
2234 * The kind is different. In some cases we should now flush the page
2235 * as it has been reused, but in most cases this is normal remapping
2236 * of PDs as PT or big pages using the GCPhys field in a slightly
2237 * different way than the other kinds.
2238 */
2239 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2240 {
2241 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2242 pgmPoolFlushPage(pPool, pPage);
2243 break;
2244 }
2245 }
2246 }
2247
2248 /* next */
2249 i = pPage->iNext;
2250 } while (i != NIL_PGMPOOL_IDX);
2251 }
2252
2253 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2254 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2255 return VERR_FILE_NOT_FOUND;
2256}
2257
2258
2259/**
2260 * Inserts a page into the cache.
2261 *
2262 * @param pPool The pool.
2263 * @param pPage The cached page.
2264 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2265 */
2266static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2267{
2268 /*
2269 * Insert into the GCPhys hash if the page is fit for that.
2270 */
2271 Assert(!pPage->fCached);
2272 if (fCanBeCached)
2273 {
2274 pPage->fCached = true;
2275 pgmPoolHashInsert(pPool, pPage);
2276 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2277 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2278 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2279 }
2280 else
2281 {
2282 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2283 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2284 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2285 }
2286
2287 /*
2288 * Insert at the head of the age list.
2289 */
2290 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2291 pPage->iAgeNext = pPool->iAgeHead;
2292 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2293 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2294 else
2295 pPool->iAgeTail = pPage->idx;
2296 pPool->iAgeHead = pPage->idx;
2297}
2298
2299
2300/**
2301 * Flushes a cached page.
2302 *
2303 * @param pPool The pool.
2304 * @param pPage The cached page.
2305 */
2306static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2307{
2308 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2309
2310 /*
2311 * Remove the page from the hash.
2312 */
2313 if (pPage->fCached)
2314 {
2315 pPage->fCached = false;
2316 pgmPoolHashRemove(pPool, pPage);
2317 }
2318 else
2319 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2320
2321 /*
2322 * Remove it from the age list.
2323 */
2324 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2325 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2326 else
2327 pPool->iAgeTail = pPage->iAgePrev;
2328 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2329 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2330 else
2331 pPool->iAgeHead = pPage->iAgeNext;
2332 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2333 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2334}
2335
2336
2337/**
2338 * Looks for pages sharing the monitor.
2339 *
2340 * @returns Pointer to the head page.
2341 * @returns NULL if not found.
2342 * @param pPool The Pool
2343 * @param pNewPage The page which is going to be monitored.
2344 */
2345static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2346{
2347 /*
2348 * Look up the GCPhys in the hash.
2349 */
2350 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2351 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2352 if (i == NIL_PGMPOOL_IDX)
2353 return NULL;
2354 do
2355 {
2356 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2357 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2358 && pPage != pNewPage)
2359 {
2360 switch (pPage->enmKind)
2361 {
2362 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2363 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2364 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2365 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2366 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2367 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2368 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2369 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2370 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2371 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2372 case PGMPOOLKIND_64BIT_PML4:
2373 case PGMPOOLKIND_32BIT_PD:
2374 case PGMPOOLKIND_PAE_PDPT:
2375 {
2376 /* find the head */
2377 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2378 {
2379 Assert(pPage->iMonitoredPrev != pPage->idx);
2380 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2381 }
2382 return pPage;
2383 }
2384
2385 /* ignore, no monitoring. */
2386 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2387 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2388 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2389 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2390 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2391 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2392 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2393 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2394 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2395 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2396 case PGMPOOLKIND_ROOT_NESTED:
2397 case PGMPOOLKIND_PAE_PD_PHYS:
2398 case PGMPOOLKIND_PAE_PDPT_PHYS:
2399 case PGMPOOLKIND_32BIT_PD_PHYS:
2400 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2401 break;
2402 default:
2403 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2404 }
2405 }
2406
2407 /* next */
2408 i = pPage->iNext;
2409 } while (i != NIL_PGMPOOL_IDX);
2410 return NULL;
2411}
2412
2413
2414/**
2415 * Enabled write monitoring of a guest page.
2416 *
2417 * @returns VBox status code.
2418 * @retval VINF_SUCCESS on success.
2419 * @param pPool The pool.
2420 * @param pPage The cached page.
2421 */
2422static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2423{
2424 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2425
2426 /*
2427 * Filter out the relevant kinds.
2428 */
2429 switch (pPage->enmKind)
2430 {
2431 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2432 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2433 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2434 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2435 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2436 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2437 case PGMPOOLKIND_64BIT_PML4:
2438 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2439 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2440 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2441 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2442 case PGMPOOLKIND_32BIT_PD:
2443 case PGMPOOLKIND_PAE_PDPT:
2444 break;
2445
2446 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2447 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2448 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2449 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2450 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2451 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2452 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2453 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2454 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2455 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2456 case PGMPOOLKIND_ROOT_NESTED:
2457 /* Nothing to monitor here. */
2458 return VINF_SUCCESS;
2459
2460 case PGMPOOLKIND_32BIT_PD_PHYS:
2461 case PGMPOOLKIND_PAE_PDPT_PHYS:
2462 case PGMPOOLKIND_PAE_PD_PHYS:
2463 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2464 /* Nothing to monitor here. */
2465 return VINF_SUCCESS;
2466 default:
2467 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2468 }
2469
2470 /*
2471 * Install handler.
2472 */
2473 int rc;
2474 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2475 if (pPageHead)
2476 {
2477 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2478 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2479
2480#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2481 if (pPageHead->fDirty)
2482 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirtyEntry, false /* do not remove */);
2483#endif
2484
2485 pPage->iMonitoredPrev = pPageHead->idx;
2486 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2487 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2488 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2489 pPageHead->iMonitoredNext = pPage->idx;
2490 rc = VINF_SUCCESS;
2491 }
2492 else
2493 {
2494 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2495 PVM pVM = pPool->CTX_SUFF(pVM);
2496 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2497 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
2498 GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK,
2499 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
2500 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
2501 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
2502 pPool->pszAccessHandler);
2503 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2504 * the heap size should suffice. */
2505 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2506 PVMCPU pVCpu = VMMGetCpu(pVM);
2507 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2508 }
2509 pPage->fMonitored = true;
2510 return rc;
2511}
2512
2513
2514/**
2515 * Disables write monitoring of a guest page.
2516 *
2517 * @returns VBox status code.
2518 * @retval VINF_SUCCESS on success.
2519 * @param pPool The pool.
2520 * @param pPage The cached page.
2521 */
2522static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2523{
2524 /*
2525 * Filter out the relevant kinds.
2526 */
2527 switch (pPage->enmKind)
2528 {
2529 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2530 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2531 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2532 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2533 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2534 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2535 case PGMPOOLKIND_64BIT_PML4:
2536 case PGMPOOLKIND_32BIT_PD:
2537 case PGMPOOLKIND_PAE_PDPT:
2538 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2539 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2540 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2541 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2542 break;
2543
2544 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2545 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2546 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2547 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2548 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2549 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2550 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2551 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2552 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2553 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2554 case PGMPOOLKIND_ROOT_NESTED:
2555 case PGMPOOLKIND_PAE_PD_PHYS:
2556 case PGMPOOLKIND_PAE_PDPT_PHYS:
2557 case PGMPOOLKIND_32BIT_PD_PHYS:
2558 /* Nothing to monitor here. */
2559 Assert(!pPage->fMonitored);
2560 return VINF_SUCCESS;
2561
2562 default:
2563 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2564 }
2565 Assert(pPage->fMonitored);
2566
2567 /*
2568 * Remove the page from the monitored list or uninstall it if last.
2569 */
2570 const PVM pVM = pPool->CTX_SUFF(pVM);
2571 int rc;
2572 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2573 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2574 {
2575 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2576 {
2577 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2578 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2579 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK,
2580 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
2581 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
2582 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
2583 pPool->pszAccessHandler);
2584 AssertFatalRCSuccess(rc);
2585 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2586 }
2587 else
2588 {
2589 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2590 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2591 {
2592 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2593 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2594 }
2595 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2596 rc = VINF_SUCCESS;
2597 }
2598 }
2599 else
2600 {
2601 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2602 AssertFatalRC(rc);
2603 PVMCPU pVCpu = VMMGetCpu(pVM);
2604 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2605 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2606 }
2607 pPage->fMonitored = false;
2608
2609 /*
2610 * Remove it from the list of modified pages (if in it).
2611 */
2612 pgmPoolMonitorModifiedRemove(pPool, pPage);
2613
2614 return rc;
2615}
2616
2617
2618/**
2619 * Inserts the page into the list of modified pages.
2620 *
2621 * @param pPool The pool.
2622 * @param pPage The page.
2623 */
2624void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2625{
2626 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2627 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2628 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2629 && pPool->iModifiedHead != pPage->idx,
2630 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2631 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2632 pPool->iModifiedHead, pPool->cModifiedPages));
2633
2634 pPage->iModifiedNext = pPool->iModifiedHead;
2635 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2636 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2637 pPool->iModifiedHead = pPage->idx;
2638 pPool->cModifiedPages++;
2639#ifdef VBOX_WITH_STATISTICS
2640 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2641 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2642#endif
2643}
2644
2645
2646/**
2647 * Removes the page from the list of modified pages and resets the
2648 * modification counter.
2649 *
2650 * @param pPool The pool.
2651 * @param pPage The page which is believed to be in the list of modified pages.
2652 */
2653static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2654{
2655 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2656 if (pPool->iModifiedHead == pPage->idx)
2657 {
2658 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2659 pPool->iModifiedHead = pPage->iModifiedNext;
2660 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2661 {
2662 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2663 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2664 }
2665 pPool->cModifiedPages--;
2666 }
2667 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2668 {
2669 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2670 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2671 {
2672 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2673 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2674 }
2675 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2676 pPool->cModifiedPages--;
2677 }
2678 else
2679 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2680 pPage->cModifications = 0;
2681}
2682
2683
2684/**
2685 * Zaps the list of modified pages, resetting their modification counters in the process.
2686 *
2687 * @param pVM Pointer to the VM.
2688 */
2689static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2690{
2691 pgmLock(pVM);
2692 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2693 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2694
2695 unsigned cPages = 0; NOREF(cPages);
2696
2697#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2698 pgmPoolResetDirtyPages(pVM);
2699#endif
2700
2701 uint16_t idx = pPool->iModifiedHead;
2702 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2703 while (idx != NIL_PGMPOOL_IDX)
2704 {
2705 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2706 idx = pPage->iModifiedNext;
2707 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2708 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2709 pPage->cModifications = 0;
2710 Assert(++cPages);
2711 }
2712 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2713 pPool->cModifiedPages = 0;
2714 pgmUnlock(pVM);
2715}
2716
2717
2718/**
2719 * Handle SyncCR3 pool tasks
2720 *
2721 * @returns VBox status code.
2722 * @retval VINF_SUCCESS if successfully added.
2723 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2724 * @param pVCpu Pointer to the VMCPU.
2725 * @remark Should only be used when monitoring is available, thus placed in
2726 * the PGMPOOL_WITH_MONITORING #ifdef.
2727 */
2728int pgmPoolSyncCR3(PVMCPU pVCpu)
2729{
2730 PVM pVM = pVCpu->CTX_SUFF(pVM);
2731 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2732
2733 /*
2734 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2735 * Occasionally we will have to clear all the shadow page tables because we wanted
2736 * to monitor a page which was mapped by too many shadowed page tables. This operation
2737 * sometimes referred to as a 'lightweight flush'.
2738 */
2739# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2740 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2741 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2742# else /* !IN_RING3 */
2743 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2744 {
2745 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2746 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2747
2748 /* Make sure all other VCPUs return to ring 3. */
2749 if (pVM->cCpus > 1)
2750 {
2751 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2752 PGM_INVL_ALL_VCPU_TLBS(pVM);
2753 }
2754 return VINF_PGM_SYNC_CR3;
2755 }
2756# endif /* !IN_RING3 */
2757 else
2758 {
2759 pgmPoolMonitorModifiedClearAll(pVM);
2760
2761 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2762 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2763 {
2764 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2765 return pgmPoolSyncCR3(pVCpu);
2766 }
2767 }
2768 return VINF_SUCCESS;
2769}
2770
2771
2772/**
2773 * Frees up at least one user entry.
2774 *
2775 * @returns VBox status code.
2776 * @retval VINF_SUCCESS if successfully added.
2777 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2778 * @param pPool The pool.
2779 * @param iUser The user index.
2780 */
2781static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2782{
2783 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2784 /*
2785 * Just free cached pages in a braindead fashion.
2786 */
2787 /** @todo walk the age list backwards and free the first with usage. */
2788 int rc = VINF_SUCCESS;
2789 do
2790 {
2791 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2792 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2793 rc = rc2;
2794 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2795 return rc;
2796}
2797
2798
2799/**
2800 * Inserts a page into the cache.
2801 *
2802 * This will create user node for the page, insert it into the GCPhys
2803 * hash, and insert it into the age list.
2804 *
2805 * @returns VBox status code.
2806 * @retval VINF_SUCCESS if successfully added.
2807 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2808 * @param pPool The pool.
2809 * @param pPage The cached page.
2810 * @param GCPhys The GC physical address of the page we're gonna shadow.
2811 * @param iUser The user index.
2812 * @param iUserTable The user table index.
2813 */
2814DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2815{
2816 int rc = VINF_SUCCESS;
2817 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2818
2819 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable));
2820
2821 if (iUser != NIL_PGMPOOL_IDX)
2822 {
2823#ifdef VBOX_STRICT
2824 /*
2825 * Check that the entry doesn't already exists.
2826 */
2827 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2828 {
2829 uint16_t i = pPage->iUserHead;
2830 do
2831 {
2832 Assert(i < pPool->cMaxUsers);
2833 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2834 i = paUsers[i].iNext;
2835 } while (i != NIL_PGMPOOL_USER_INDEX);
2836 }
2837#endif
2838
2839 /*
2840 * Find free a user node.
2841 */
2842 uint16_t i = pPool->iUserFreeHead;
2843 if (i == NIL_PGMPOOL_USER_INDEX)
2844 {
2845 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2846 if (RT_FAILURE(rc))
2847 return rc;
2848 i = pPool->iUserFreeHead;
2849 }
2850
2851 /*
2852 * Unlink the user node from the free list,
2853 * initialize and insert it into the user list.
2854 */
2855 pPool->iUserFreeHead = paUsers[i].iNext;
2856 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2857 paUsers[i].iUser = iUser;
2858 paUsers[i].iUserTable = iUserTable;
2859 pPage->iUserHead = i;
2860 }
2861 else
2862 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2863
2864
2865 /*
2866 * Insert into cache and enable monitoring of the guest page if enabled.
2867 *
2868 * Until we implement caching of all levels, including the CR3 one, we'll
2869 * have to make sure we don't try monitor & cache any recursive reuse of
2870 * a monitored CR3 page. Because all windows versions are doing this we'll
2871 * have to be able to do combined access monitoring, CR3 + PT and
2872 * PD + PT (guest PAE).
2873 *
2874 * Update:
2875 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2876 */
2877 const bool fCanBeMonitored = true;
2878 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2879 if (fCanBeMonitored)
2880 {
2881 rc = pgmPoolMonitorInsert(pPool, pPage);
2882 AssertRC(rc);
2883 }
2884 return rc;
2885}
2886
2887
2888/**
2889 * Adds a user reference to a page.
2890 *
2891 * This will move the page to the head of the
2892 *
2893 * @returns VBox status code.
2894 * @retval VINF_SUCCESS if successfully added.
2895 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2896 * @param pPool The pool.
2897 * @param pPage The cached page.
2898 * @param iUser The user index.
2899 * @param iUserTable The user table.
2900 */
2901static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2902{
2903 Log3(("pgmPoolTrackAddUser: GCPhys=%RGp iUser=%%x iUserTable=%x\n", pPage->GCPhys, iUser, iUserTable));
2904 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2905 Assert(iUser != NIL_PGMPOOL_IDX);
2906
2907# ifdef VBOX_STRICT
2908 /*
2909 * Check that the entry doesn't already exists. We only allow multiple
2910 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
2911 */
2912 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2913 {
2914 uint16_t i = pPage->iUserHead;
2915 do
2916 {
2917 Assert(i < pPool->cMaxUsers);
2918 /** @todo this assertion looks odd... Shouldn't it be && here? */
2919 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2920 i = paUsers[i].iNext;
2921 } while (i != NIL_PGMPOOL_USER_INDEX);
2922 }
2923# endif
2924
2925 /*
2926 * Allocate a user node.
2927 */
2928 uint16_t i = pPool->iUserFreeHead;
2929 if (i == NIL_PGMPOOL_USER_INDEX)
2930 {
2931 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2932 if (RT_FAILURE(rc))
2933 return rc;
2934 i = pPool->iUserFreeHead;
2935 }
2936 pPool->iUserFreeHead = paUsers[i].iNext;
2937
2938 /*
2939 * Initialize the user node and insert it.
2940 */
2941 paUsers[i].iNext = pPage->iUserHead;
2942 paUsers[i].iUser = iUser;
2943 paUsers[i].iUserTable = iUserTable;
2944 pPage->iUserHead = i;
2945
2946# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2947 if (pPage->fDirty)
2948 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirtyEntry, false /* do not remove */);
2949# endif
2950
2951 /*
2952 * Tell the cache to update its replacement stats for this page.
2953 */
2954 pgmPoolCacheUsed(pPool, pPage);
2955 return VINF_SUCCESS;
2956}
2957
2958
2959/**
2960 * Frees a user record associated with a page.
2961 *
2962 * This does not clear the entry in the user table, it simply replaces the
2963 * user record to the chain of free records.
2964 *
2965 * @param pPool The pool.
2966 * @param HCPhys The HC physical address of the shadow page.
2967 * @param iUser The shadow page pool index of the user table.
2968 * @param iUserTable The index into the user table (shadowed).
2969 *
2970 * @remarks Don't call this for root pages.
2971 */
2972static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2973{
2974 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2975 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2976 Assert(iUser != NIL_PGMPOOL_IDX);
2977
2978 /*
2979 * Unlink and free the specified user entry.
2980 */
2981
2982 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2983 uint16_t i = pPage->iUserHead;
2984 if ( i != NIL_PGMPOOL_USER_INDEX
2985 && paUsers[i].iUser == iUser
2986 && paUsers[i].iUserTable == iUserTable)
2987 {
2988 pPage->iUserHead = paUsers[i].iNext;
2989
2990 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2991 paUsers[i].iNext = pPool->iUserFreeHead;
2992 pPool->iUserFreeHead = i;
2993 return;
2994 }
2995
2996 /* General: Linear search. */
2997 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2998 while (i != NIL_PGMPOOL_USER_INDEX)
2999 {
3000 if ( paUsers[i].iUser == iUser
3001 && paUsers[i].iUserTable == iUserTable)
3002 {
3003 if (iPrev != NIL_PGMPOOL_USER_INDEX)
3004 paUsers[iPrev].iNext = paUsers[i].iNext;
3005 else
3006 pPage->iUserHead = paUsers[i].iNext;
3007
3008 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3009 paUsers[i].iNext = pPool->iUserFreeHead;
3010 pPool->iUserFreeHead = i;
3011 return;
3012 }
3013 iPrev = i;
3014 i = paUsers[i].iNext;
3015 }
3016
3017 /* Fatal: didn't find it */
3018 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
3019 iUser, iUserTable, pPage->GCPhys));
3020}
3021
3022
3023/**
3024 * Gets the entry size of a shadow table.
3025 *
3026 * @param enmKind The kind of page.
3027 *
3028 * @returns The size of the entry in bytes. That is, 4 or 8.
3029 * @returns If the kind is not for a table, an assertion is raised and 0 is
3030 * returned.
3031 */
3032DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
3033{
3034 switch (enmKind)
3035 {
3036 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3037 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3038 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3039 case PGMPOOLKIND_32BIT_PD:
3040 case PGMPOOLKIND_32BIT_PD_PHYS:
3041 return 4;
3042
3043 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3044 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3045 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3046 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3047 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3048 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3049 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3050 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3051 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3052 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3053 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3054 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3055 case PGMPOOLKIND_64BIT_PML4:
3056 case PGMPOOLKIND_PAE_PDPT:
3057 case PGMPOOLKIND_ROOT_NESTED:
3058 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3059 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3060 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3061 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3062 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3063 case PGMPOOLKIND_PAE_PD_PHYS:
3064 case PGMPOOLKIND_PAE_PDPT_PHYS:
3065 return 8;
3066
3067 default:
3068 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3069 }
3070}
3071
3072
3073/**
3074 * Gets the entry size of a guest table.
3075 *
3076 * @param enmKind The kind of page.
3077 *
3078 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3079 * @returns If the kind is not for a table, an assertion is raised and 0 is
3080 * returned.
3081 */
3082DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3083{
3084 switch (enmKind)
3085 {
3086 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3087 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3088 case PGMPOOLKIND_32BIT_PD:
3089 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3090 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3091 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3092 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3093 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3094 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3095 return 4;
3096
3097 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3098 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3099 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3100 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3101 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3102 case PGMPOOLKIND_64BIT_PML4:
3103 case PGMPOOLKIND_PAE_PDPT:
3104 return 8;
3105
3106 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3107 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3108 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3109 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3110 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3111 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3112 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3113 case PGMPOOLKIND_ROOT_NESTED:
3114 case PGMPOOLKIND_PAE_PD_PHYS:
3115 case PGMPOOLKIND_PAE_PDPT_PHYS:
3116 case PGMPOOLKIND_32BIT_PD_PHYS:
3117 /** @todo can we return 0? (nobody is calling this...) */
3118 AssertFailed();
3119 return 0;
3120
3121 default:
3122 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3123 }
3124}
3125
3126
3127/**
3128 * Checks one shadow page table entry for a mapping of a physical page.
3129 *
3130 * @returns true / false indicating removal of all relevant PTEs
3131 *
3132 * @param pVM Pointer to the VM.
3133 * @param pPhysPage The guest page in question.
3134 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3135 * @param iShw The shadow page table.
3136 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3137 */
3138static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3139{
3140 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3141 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3142 bool fRet = false;
3143
3144 /*
3145 * Assert sanity.
3146 */
3147 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3148 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3149 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3150
3151 /*
3152 * Then, clear the actual mappings to the page in the shadow PT.
3153 */
3154 switch (pPage->enmKind)
3155 {
3156 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3157 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3158 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3159 {
3160 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3161 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3162 uint32_t u32AndMask = 0;
3163 uint32_t u32OrMask = 0;
3164
3165 if (!fFlushPTEs)
3166 {
3167 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3168 {
3169 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
3170 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
3171 u32OrMask = X86_PTE_RW;
3172 u32AndMask = UINT32_MAX;
3173 fRet = true;
3174 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3175 break;
3176
3177 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3178 u32OrMask = 0;
3179 u32AndMask = ~X86_PTE_RW;
3180 fRet = true;
3181 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3182 break;
3183 default:
3184 /* (shouldn't be here, will assert below) */
3185 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3186 break;
3187 }
3188 }
3189 else
3190 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3191
3192 /* Update the counter if we're removing references. */
3193 if (!u32AndMask)
3194 {
3195 Assert(pPage->cPresent);
3196 Assert(pPool->cPresent);
3197 pPage->cPresent--;
3198 pPool->cPresent--;
3199 }
3200
3201 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3202 {
3203 X86PTE Pte;
3204
3205 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3206 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3207 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3208 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3209
3210 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3211 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3212 return fRet;
3213 }
3214#ifdef LOG_ENABLED
3215 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3216 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3217 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3218 {
3219 Log(("i=%d cFound=%d\n", i, ++cFound));
3220 }
3221#endif
3222 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3223 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3224 break;
3225 }
3226
3227 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3228 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3229 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3230 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3231 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3232 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3233 {
3234 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3235 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3236 uint64_t u64OrMask = 0;
3237 uint64_t u64AndMask = 0;
3238
3239 if (!fFlushPTEs)
3240 {
3241 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3242 {
3243 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3244 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3245 u64OrMask = X86_PTE_RW;
3246 u64AndMask = UINT64_MAX;
3247 fRet = true;
3248 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3249 break;
3250
3251 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3252 u64OrMask = 0;
3253 u64AndMask = ~(uint64_t)X86_PTE_RW;
3254 fRet = true;
3255 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3256 break;
3257
3258 default:
3259 /* (shouldn't be here, will assert below) */
3260 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3261 break;
3262 }
3263 }
3264 else
3265 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3266
3267 /* Update the counter if we're removing references. */
3268 if (!u64AndMask)
3269 {
3270 Assert(pPage->cPresent);
3271 Assert(pPool->cPresent);
3272 pPage->cPresent--;
3273 pPool->cPresent--;
3274 }
3275
3276 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3277 {
3278 X86PTEPAE Pte;
3279
3280 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3281 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3282 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3283 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3284
3285 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3286 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3287 return fRet;
3288 }
3289#ifdef LOG_ENABLED
3290 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3291 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3292 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3293 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3294 Log(("i=%d cFound=%d\n", i, ++cFound));
3295#endif
3296 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3297 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3298 break;
3299 }
3300
3301#ifdef PGM_WITH_LARGE_PAGES
3302 /* Large page case only. */
3303 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3304 {
3305 Assert(pVM->pgm.s.fNestedPaging);
3306
3307 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3308 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3309
3310 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3311 {
3312 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3313 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3314 pPD->a[iPte].u = 0;
3315 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3316
3317 /* Update the counter as we're removing references. */
3318 Assert(pPage->cPresent);
3319 Assert(pPool->cPresent);
3320 pPage->cPresent--;
3321 pPool->cPresent--;
3322
3323 return fRet;
3324 }
3325# ifdef LOG_ENABLED
3326 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3327 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3328 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3329 Log(("i=%d cFound=%d\n", i, ++cFound));
3330# endif
3331 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3332 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3333 break;
3334 }
3335
3336 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3337 case PGMPOOLKIND_PAE_PD_PHYS:
3338 {
3339 Assert(pVM->pgm.s.fNestedPaging);
3340
3341 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3342 PX86PD pPD = (PX86PD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3343
3344 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3345 {
3346 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3347 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3348 pPD->a[iPte].u = 0;
3349 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3350
3351 /* Update the counter as we're removing references. */
3352 Assert(pPage->cPresent);
3353 Assert(pPool->cPresent);
3354 pPage->cPresent--;
3355 pPool->cPresent--;
3356 return fRet;
3357 }
3358# ifdef LOG_ENABLED
3359 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3360 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3361 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3362 Log(("i=%d cFound=%d\n", i, ++cFound));
3363# endif
3364 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3365 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3366 break;
3367 }
3368#endif /* PGM_WITH_LARGE_PAGES */
3369
3370 default:
3371 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3372 }
3373
3374 /* not reached. */
3375#ifndef _MSC_VER
3376 return fRet;
3377#endif
3378}
3379
3380
3381/**
3382 * Scans one shadow page table for mappings of a physical page.
3383 *
3384 * @param pVM Pointer to the VM.
3385 * @param pPhysPage The guest page in question.
3386 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3387 * @param iShw The shadow page table.
3388 */
3389static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3390{
3391 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3392
3393 /* We should only come here with when there's only one reference to this physical page. */
3394 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3395
3396 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3397 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3398 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3399 if (!fKeptPTEs)
3400 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3401 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3402}
3403
3404
3405/**
3406 * Flushes a list of shadow page tables mapping the same physical page.
3407 *
3408 * @param pVM Pointer to the VM.
3409 * @param pPhysPage The guest page in question.
3410 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3411 * @param iPhysExt The physical cross reference extent list to flush.
3412 */
3413static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3414{
3415 PGM_LOCK_ASSERT_OWNER(pVM);
3416 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3417 bool fKeepList = false;
3418
3419 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3420 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3421
3422 const uint16_t iPhysExtStart = iPhysExt;
3423 PPGMPOOLPHYSEXT pPhysExt;
3424 do
3425 {
3426 Assert(iPhysExt < pPool->cMaxPhysExts);
3427 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3428 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3429 {
3430 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3431 {
3432 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3433 if (!fKeptPTEs)
3434 {
3435 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3436 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3437 }
3438 else
3439 fKeepList = true;
3440 }
3441 }
3442 /* next */
3443 iPhysExt = pPhysExt->iNext;
3444 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3445
3446 if (!fKeepList)
3447 {
3448 /* insert the list into the free list and clear the ram range entry. */
3449 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3450 pPool->iPhysExtFreeHead = iPhysExtStart;
3451 /* Invalidate the tracking data. */
3452 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3453 }
3454
3455 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3456}
3457
3458
3459/**
3460 * Flushes all shadow page table mappings of the given guest page.
3461 *
3462 * This is typically called when the host page backing the guest one has been
3463 * replaced or when the page protection was changed due to a guest access
3464 * caught by the monitoring.
3465 *
3466 * @returns VBox status code.
3467 * @retval VINF_SUCCESS if all references has been successfully cleared.
3468 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3469 * pool cleaning. FF and sync flags are set.
3470 *
3471 * @param pVM Pointer to the VM.
3472 * @param GCPhysPage GC physical address of the page in question
3473 * @param pPhysPage The guest page in question.
3474 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3475 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3476 * flushed, it is NOT touched if this isn't necessary.
3477 * The caller MUST initialized this to @a false.
3478 */
3479int pgmPoolTrackUpdateGCPhys(PVM pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3480{
3481 PVMCPU pVCpu = VMMGetCpu(pVM);
3482 pgmLock(pVM);
3483 int rc = VINF_SUCCESS;
3484
3485#ifdef PGM_WITH_LARGE_PAGES
3486 /* Is this page part of a large page? */
3487 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3488 {
3489 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3490 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3491
3492 /* Fetch the large page base. */
3493 PPGMPAGE pLargePage;
3494 if (GCPhysBase != GCPhysPage)
3495 {
3496 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3497 AssertFatal(pLargePage);
3498 }
3499 else
3500 pLargePage = pPhysPage;
3501
3502 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3503
3504 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3505 {
3506 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3507 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3508 pVM->pgm.s.cLargePagesDisabled++;
3509
3510 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3511 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3512
3513 *pfFlushTLBs = true;
3514 pgmUnlock(pVM);
3515 return rc;
3516 }
3517 }
3518#else
3519 NOREF(GCPhysPage);
3520#endif /* PGM_WITH_LARGE_PAGES */
3521
3522 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3523 if (u16)
3524 {
3525 /*
3526 * The zero page is currently screwing up the tracking and we'll
3527 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3528 * is defined, zero pages won't normally be mapped. Some kind of solution
3529 * will be needed for this problem of course, but it will have to wait...
3530 */
3531 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3532 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3533 rc = VINF_PGM_GCPHYS_ALIASED;
3534 else
3535 {
3536# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC) /** @todo we can drop this now. */
3537 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3538 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3539 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
3540# endif
3541
3542 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3543 {
3544 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3545 pgmPoolTrackFlushGCPhysPT(pVM,
3546 pPhysPage,
3547 fFlushPTEs,
3548 PGMPOOL_TD_GET_IDX(u16));
3549 }
3550 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3551 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3552 else
3553 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3554 *pfFlushTLBs = true;
3555
3556# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
3557 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
3558# endif
3559 }
3560 }
3561
3562 if (rc == VINF_PGM_GCPHYS_ALIASED)
3563 {
3564 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3565 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3566 rc = VINF_PGM_SYNC_CR3;
3567 }
3568 pgmUnlock(pVM);
3569 return rc;
3570}
3571
3572
3573/**
3574 * Scans all shadow page tables for mappings of a physical page.
3575 *
3576 * This may be slow, but it's most likely more efficient than cleaning
3577 * out the entire page pool / cache.
3578 *
3579 * @returns VBox status code.
3580 * @retval VINF_SUCCESS if all references has been successfully cleared.
3581 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3582 * a page pool cleaning.
3583 *
3584 * @param pVM Pointer to the VM.
3585 * @param pPhysPage The guest page in question.
3586 */
3587int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3588{
3589 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3590 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3591 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3592 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3593
3594 /*
3595 * There is a limit to what makes sense.
3596 */
3597 if ( pPool->cPresent > 1024
3598 && pVM->cCpus == 1)
3599 {
3600 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3601 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3602 return VINF_PGM_GCPHYS_ALIASED;
3603 }
3604
3605 /*
3606 * Iterate all the pages until we've encountered all that in use.
3607 * This is simple but not quite optimal solution.
3608 */
3609 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P; /** @todo drop X86_PTE_P here as we always test if present separately, anyway. */
3610 const uint32_t u32 = u64; /** @todo move into the 32BIT_PT_xx case */
3611 unsigned cLeft = pPool->cUsedPages;
3612 unsigned iPage = pPool->cCurPages;
3613 while (--iPage >= PGMPOOL_IDX_FIRST)
3614 {
3615 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3616 if ( pPage->GCPhys != NIL_RTGCPHYS
3617 && pPage->cPresent)
3618 {
3619 switch (pPage->enmKind)
3620 {
3621 /*
3622 * We only care about shadow page tables.
3623 */
3624 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3625 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3626 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3627 {
3628 unsigned cPresent = pPage->cPresent;
3629 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3630 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3631 if (pPT->a[i].n.u1Present)
3632 {
3633 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3634 {
3635 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3636 pPT->a[i].u = 0;
3637
3638 /* Update the counter as we're removing references. */
3639 Assert(pPage->cPresent);
3640 Assert(pPool->cPresent);
3641 pPage->cPresent--;
3642 pPool->cPresent--;
3643 }
3644 if (!--cPresent)
3645 break;
3646 }
3647 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3648 break;
3649 }
3650
3651 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3652 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3653 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3654 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3655 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3656 {
3657 unsigned cPresent = pPage->cPresent;
3658 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3659 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3660 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3661 {
3662 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3663 {
3664 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3665 PGMSHWPTEPAE_SET(pPT->a[i], 0); /// @todo why not atomic?
3666
3667 /* Update the counter as we're removing references. */
3668 Assert(pPage->cPresent);
3669 Assert(pPool->cPresent);
3670 pPage->cPresent--;
3671 pPool->cPresent--;
3672 }
3673 if (!--cPresent)
3674 break;
3675 }
3676 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3677 break;
3678 }
3679#ifndef IN_RC
3680 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3681 {
3682 unsigned cPresent = pPage->cPresent;
3683 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3684 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3685 if (pPT->a[i].n.u1Present)
3686 {
3687 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3688 {
3689 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3690 pPT->a[i].u = 0;
3691
3692 /* Update the counter as we're removing references. */
3693 Assert(pPage->cPresent);
3694 Assert(pPool->cPresent);
3695 pPage->cPresent--;
3696 pPool->cPresent--;
3697 }
3698 if (!--cPresent)
3699 break;
3700 }
3701 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3702 break;
3703 }
3704#endif
3705 }
3706 if (!--cLeft)
3707 break;
3708 }
3709 }
3710
3711 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3712 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3713
3714 /*
3715 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3716 */
3717 if (pPool->cPresent > 1024)
3718 {
3719 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3720 return VINF_PGM_GCPHYS_ALIASED;
3721 }
3722
3723 return VINF_SUCCESS;
3724}
3725
3726
3727/**
3728 * Clears the user entry in a user table.
3729 *
3730 * This is used to remove all references to a page when flushing it.
3731 */
3732static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3733{
3734 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3735 Assert(pUser->iUser < pPool->cCurPages);
3736 uint32_t iUserTable = pUser->iUserTable;
3737
3738 /*
3739 * Map the user page. Ignore references made by fictitious pages.
3740 */
3741 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3742 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3743 union
3744 {
3745 uint64_t *pau64;
3746 uint32_t *pau32;
3747 } u;
3748 if (pUserPage->idx < PGMPOOL_IDX_FIRST)
3749 {
3750 Assert(!pUserPage->pvPageR3);
3751 return;
3752 }
3753 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3754
3755
3756 /* Safety precaution in case we change the paging for other modes too in the future. */
3757 Assert(!pgmPoolIsPageLocked(pPage));
3758
3759#ifdef VBOX_STRICT
3760 /*
3761 * Some sanity checks.
3762 */
3763 switch (pUserPage->enmKind)
3764 {
3765 case PGMPOOLKIND_32BIT_PD:
3766 case PGMPOOLKIND_32BIT_PD_PHYS:
3767 Assert(iUserTable < X86_PG_ENTRIES);
3768 break;
3769 case PGMPOOLKIND_PAE_PDPT:
3770 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3771 case PGMPOOLKIND_PAE_PDPT_PHYS:
3772 Assert(iUserTable < 4);
3773 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3774 break;
3775 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3776 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3777 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3778 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3779 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3780 case PGMPOOLKIND_PAE_PD_PHYS:
3781 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3782 break;
3783 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3784 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3785 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3786 break;
3787 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3788 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3789 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3790 break;
3791 case PGMPOOLKIND_64BIT_PML4:
3792 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3793 /* GCPhys >> PAGE_SHIFT is the index here */
3794 break;
3795 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3796 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3797 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3798 break;
3799
3800 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3801 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3802 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3803 break;
3804
3805 case PGMPOOLKIND_ROOT_NESTED:
3806 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3807 break;
3808
3809 default:
3810 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3811 break;
3812 }
3813#endif /* VBOX_STRICT */
3814
3815 /*
3816 * Clear the entry in the user page.
3817 */
3818 switch (pUserPage->enmKind)
3819 {
3820 /* 32-bit entries */
3821 case PGMPOOLKIND_32BIT_PD:
3822 case PGMPOOLKIND_32BIT_PD_PHYS:
3823 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
3824 break;
3825
3826 /* 64-bit entries */
3827 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3828 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3829 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3830 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3831 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3832#ifdef IN_RC
3833 /*
3834 * In 32 bits PAE mode we *must* invalidate the TLB when changing a
3835 * PDPT entry; the CPU fetches them only during cr3 load, so any
3836 * non-present PDPT will continue to cause page faults.
3837 */
3838 ASMReloadCR3();
3839 /* no break */
3840#endif
3841 case PGMPOOLKIND_PAE_PD_PHYS:
3842 case PGMPOOLKIND_PAE_PDPT_PHYS:
3843 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3844 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3845 case PGMPOOLKIND_64BIT_PML4:
3846 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3847 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3848 case PGMPOOLKIND_PAE_PDPT:
3849 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3850 case PGMPOOLKIND_ROOT_NESTED:
3851 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3852 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3853 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
3854 break;
3855
3856 default:
3857 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3858 }
3859 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
3860}
3861
3862
3863/**
3864 * Clears all users of a page.
3865 */
3866static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3867{
3868 /*
3869 * Free all the user records.
3870 */
3871 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3872
3873 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3874 uint16_t i = pPage->iUserHead;
3875 while (i != NIL_PGMPOOL_USER_INDEX)
3876 {
3877 /* Clear enter in user table. */
3878 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3879
3880 /* Free it. */
3881 const uint16_t iNext = paUsers[i].iNext;
3882 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3883 paUsers[i].iNext = pPool->iUserFreeHead;
3884 pPool->iUserFreeHead = i;
3885
3886 /* Next. */
3887 i = iNext;
3888 }
3889 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3890}
3891
3892
3893/**
3894 * Allocates a new physical cross reference extent.
3895 *
3896 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3897 * @param pVM Pointer to the VM.
3898 * @param piPhysExt Where to store the phys ext index.
3899 */
3900PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3901{
3902 PGM_LOCK_ASSERT_OWNER(pVM);
3903 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3904 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3905 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3906 {
3907 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3908 return NULL;
3909 }
3910 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3911 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3912 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3913 *piPhysExt = iPhysExt;
3914 return pPhysExt;
3915}
3916
3917
3918/**
3919 * Frees a physical cross reference extent.
3920 *
3921 * @param pVM Pointer to the VM.
3922 * @param iPhysExt The extent to free.
3923 */
3924void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3925{
3926 PGM_LOCK_ASSERT_OWNER(pVM);
3927 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3928 Assert(iPhysExt < pPool->cMaxPhysExts);
3929 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3930 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3931 {
3932 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3933 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3934 }
3935 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3936 pPool->iPhysExtFreeHead = iPhysExt;
3937}
3938
3939
3940/**
3941 * Frees a physical cross reference extent.
3942 *
3943 * @param pVM Pointer to the VM.
3944 * @param iPhysExt The extent to free.
3945 */
3946void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3947{
3948 PGM_LOCK_ASSERT_OWNER(pVM);
3949 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3950
3951 const uint16_t iPhysExtStart = iPhysExt;
3952 PPGMPOOLPHYSEXT pPhysExt;
3953 do
3954 {
3955 Assert(iPhysExt < pPool->cMaxPhysExts);
3956 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3957 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3958 {
3959 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3960 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3961 }
3962
3963 /* next */
3964 iPhysExt = pPhysExt->iNext;
3965 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3966
3967 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3968 pPool->iPhysExtFreeHead = iPhysExtStart;
3969}
3970
3971
3972/**
3973 * Insert a reference into a list of physical cross reference extents.
3974 *
3975 * @returns The new tracking data for PGMPAGE.
3976 *
3977 * @param pVM Pointer to the VM.
3978 * @param iPhysExt The physical extent index of the list head.
3979 * @param iShwPT The shadow page table index.
3980 * @param iPte Page table entry
3981 *
3982 */
3983static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
3984{
3985 PGM_LOCK_ASSERT_OWNER(pVM);
3986 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3987 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3988
3989 /*
3990 * Special common cases.
3991 */
3992 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
3993 {
3994 paPhysExts[iPhysExt].aidx[1] = iShwPT;
3995 paPhysExts[iPhysExt].apte[1] = iPte;
3996 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
3997 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
3998 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3999 }
4000 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
4001 {
4002 paPhysExts[iPhysExt].aidx[2] = iShwPT;
4003 paPhysExts[iPhysExt].apte[2] = iPte;
4004 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4005 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
4006 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4007 }
4008 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
4009
4010 /*
4011 * General treatment.
4012 */
4013 const uint16_t iPhysExtStart = iPhysExt;
4014 unsigned cMax = 15;
4015 for (;;)
4016 {
4017 Assert(iPhysExt < pPool->cMaxPhysExts);
4018 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4019 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
4020 {
4021 paPhysExts[iPhysExt].aidx[i] = iShwPT;
4022 paPhysExts[iPhysExt].apte[i] = iPte;
4023 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4024 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
4025 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
4026 }
4027 if (!--cMax)
4028 {
4029 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackOverflows);
4030 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4031 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
4032 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4033 }
4034
4035 /* advance */
4036 iPhysExt = paPhysExts[iPhysExt].iNext;
4037 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4038 break;
4039 }
4040
4041 /*
4042 * Add another extent to the list.
4043 */
4044 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4045 if (!pNew)
4046 {
4047 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackNoExtentsLeft);
4048 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4049 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4050 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4051 }
4052 pNew->iNext = iPhysExtStart;
4053 pNew->aidx[0] = iShwPT;
4054 pNew->apte[0] = iPte;
4055 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4056 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4057}
4058
4059
4060/**
4061 * Add a reference to guest physical page where extents are in use.
4062 *
4063 * @returns The new tracking data for PGMPAGE.
4064 *
4065 * @param pVM Pointer to the VM.
4066 * @param pPhysPage Pointer to the aPages entry in the ram range.
4067 * @param u16 The ram range flags (top 16-bits).
4068 * @param iShwPT The shadow page table index.
4069 * @param iPte Page table entry
4070 */
4071uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4072{
4073 pgmLock(pVM);
4074 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4075 {
4076 /*
4077 * Convert to extent list.
4078 */
4079 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4080 uint16_t iPhysExt;
4081 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4082 if (pPhysExt)
4083 {
4084 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4085 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliased);
4086 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4087 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4088 pPhysExt->aidx[1] = iShwPT;
4089 pPhysExt->apte[1] = iPte;
4090 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4091 }
4092 else
4093 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4094 }
4095 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4096 {
4097 /*
4098 * Insert into the extent list.
4099 */
4100 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4101 }
4102 else
4103 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedLots);
4104 pgmUnlock(pVM);
4105 return u16;
4106}
4107
4108
4109/**
4110 * Clear references to guest physical memory.
4111 *
4112 * @param pPool The pool.
4113 * @param pPage The page.
4114 * @param pPhysPage Pointer to the aPages entry in the ram range.
4115 * @param iPte Shadow PTE index
4116 */
4117void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4118{
4119 PVM pVM = pPool->CTX_SUFF(pVM);
4120 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4121 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4122
4123 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4124 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4125 {
4126 pgmLock(pVM);
4127
4128 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4129 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4130 do
4131 {
4132 Assert(iPhysExt < pPool->cMaxPhysExts);
4133
4134 /*
4135 * Look for the shadow page and check if it's all freed.
4136 */
4137 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4138 {
4139 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4140 && paPhysExts[iPhysExt].apte[i] == iPte)
4141 {
4142 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4143 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4144
4145 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4146 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4147 {
4148 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4149 pgmUnlock(pVM);
4150 return;
4151 }
4152
4153 /* we can free the node. */
4154 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4155 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4156 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4157 {
4158 /* lonely node */
4159 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4160 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4161 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4162 }
4163 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4164 {
4165 /* head */
4166 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4167 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4168 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4169 }
4170 else
4171 {
4172 /* in list */
4173 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4174 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4175 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4176 }
4177 iPhysExt = iPhysExtNext;
4178 pgmUnlock(pVM);
4179 return;
4180 }
4181 }
4182
4183 /* next */
4184 iPhysExtPrev = iPhysExt;
4185 iPhysExt = paPhysExts[iPhysExt].iNext;
4186 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4187
4188 pgmUnlock(pVM);
4189 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4190 }
4191 else /* nothing to do */
4192 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4193}
4194
4195/**
4196 * Clear references to guest physical memory.
4197 *
4198 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4199 * physical address is assumed to be correct, so the linear search can be
4200 * skipped and we can assert at an earlier point.
4201 *
4202 * @param pPool The pool.
4203 * @param pPage The page.
4204 * @param HCPhys The host physical address corresponding to the guest page.
4205 * @param GCPhys The guest physical address corresponding to HCPhys.
4206 * @param iPte Shadow PTE index
4207 */
4208static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4209{
4210 /*
4211 * Lookup the page and check if it checks out before derefing it.
4212 */
4213 PVM pVM = pPool->CTX_SUFF(pVM);
4214 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4215 if (pPhysPage)
4216 {
4217 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4218#ifdef LOG_ENABLED
4219 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4220 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4221#endif
4222 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4223 {
4224 Assert(pPage->cPresent);
4225 Assert(pPool->cPresent);
4226 pPage->cPresent--;
4227 pPool->cPresent--;
4228 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4229 return;
4230 }
4231
4232 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp\n",
4233 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage)));
4234 }
4235 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4236}
4237
4238
4239/**
4240 * Clear references to guest physical memory.
4241 *
4242 * @param pPool The pool.
4243 * @param pPage The page.
4244 * @param HCPhys The host physical address corresponding to the guest page.
4245 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4246 * @param iPte Shadow pte index
4247 */
4248void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4249{
4250 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4251
4252 /*
4253 * Try the hint first.
4254 */
4255 RTHCPHYS HCPhysHinted;
4256 PVM pVM = pPool->CTX_SUFF(pVM);
4257 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4258 if (pPhysPage)
4259 {
4260 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4261 Assert(HCPhysHinted);
4262 if (HCPhysHinted == HCPhys)
4263 {
4264 Assert(pPage->cPresent);
4265 Assert(pPool->cPresent);
4266 pPage->cPresent--;
4267 pPool->cPresent--;
4268 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4269 return;
4270 }
4271 }
4272 else
4273 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4274
4275 /*
4276 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4277 */
4278 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4279 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX);
4280 while (pRam)
4281 {
4282 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4283 while (iPage-- > 0)
4284 {
4285 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4286 {
4287 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4288 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4289 Assert(pPage->cPresent);
4290 Assert(pPool->cPresent);
4291 pPage->cPresent--;
4292 pPool->cPresent--;
4293 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4294 return;
4295 }
4296 }
4297 pRam = pRam->CTX_SUFF(pNext);
4298 }
4299
4300 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4301}
4302
4303
4304/**
4305 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4306 *
4307 * @param pPool The pool.
4308 * @param pPage The page.
4309 * @param pShwPT The shadow page table (mapping of the page).
4310 * @param pGstPT The guest page table.
4311 */
4312DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4313{
4314 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4315 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4316 {
4317 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4318 if (pShwPT->a[i].n.u1Present)
4319 {
4320 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4321 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4322 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & fPgMask, i);
4323 if (!pPage->cPresent)
4324 break;
4325 }
4326 }
4327}
4328
4329
4330/**
4331 * Clear references to guest physical memory in a PAE / 32-bit page table.
4332 *
4333 * @param pPool The pool.
4334 * @param pPage The page.
4335 * @param pShwPT The shadow page table (mapping of the page).
4336 * @param pGstPT The guest page table (just a half one).
4337 */
4338DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4339{
4340 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4341 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4342 {
4343 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4344 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4345 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4346 {
4347 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4348 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4349 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4350 if (!pPage->cPresent)
4351 break;
4352 }
4353 }
4354}
4355
4356
4357/**
4358 * Clear references to guest physical memory in a PAE / PAE page table.
4359 *
4360 * @param pPool The pool.
4361 * @param pPage The page.
4362 * @param pShwPT The shadow page table (mapping of the page).
4363 * @param pGstPT The guest page table.
4364 */
4365DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4366{
4367 RTGCPHYS const fPgMask = pPage->fA20Enabled ? X86_PTE_PAE_PG_MASK : X86_PTE_PAE_PG_MASK & ~RT_BIT_64(20);
4368 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4369 {
4370 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4371 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4372 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4373 {
4374 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4375 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4376 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4377 if (!pPage->cPresent)
4378 break;
4379 }
4380 }
4381}
4382
4383
4384/**
4385 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4386 *
4387 * @param pPool The pool.
4388 * @param pPage The page.
4389 * @param pShwPT The shadow page table (mapping of the page).
4390 */
4391DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4392{
4393 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4394 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4395 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4396 {
4397 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4398 if (pShwPT->a[i].n.u1Present)
4399 {
4400 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4401 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4402 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4403 if (!pPage->cPresent)
4404 break;
4405 }
4406 }
4407}
4408
4409
4410/**
4411 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4412 *
4413 * @param pPool The pool.
4414 * @param pPage The page.
4415 * @param pShwPT The shadow page table (mapping of the page).
4416 */
4417DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4418{
4419 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4420 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4421 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4422 {
4423 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4424 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4425 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4426 {
4427 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4428 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4429 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys & GCPhysA20Mask, i);
4430 if (!pPage->cPresent)
4431 break;
4432 }
4433 }
4434}
4435
4436
4437/**
4438 * Clear references to shadowed pages in an EPT page table.
4439 *
4440 * @param pPool The pool.
4441 * @param pPage The page.
4442 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4443 */
4444DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4445{
4446 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4447 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4448 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4449 {
4450 Assert((pShwPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4451 if (pShwPT->a[i].n.u1Present)
4452 {
4453 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4454 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4455 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4456 if (!pPage->cPresent)
4457 break;
4458 }
4459 }
4460}
4461
4462
4463/**
4464 * Clear references to shadowed pages in a 32 bits page directory.
4465 *
4466 * @param pPool The pool.
4467 * @param pPage The page.
4468 * @param pShwPD The shadow page directory (mapping of the page).
4469 */
4470DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4471{
4472 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4473 {
4474 Assert(!(pShwPD->a[i].u & RT_BIT_32(9)));
4475 if ( pShwPD->a[i].n.u1Present
4476 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4477 )
4478 {
4479 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4480 if (pSubPage)
4481 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4482 else
4483 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4484 }
4485 }
4486}
4487
4488
4489/**
4490 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4491 *
4492 * @param pPool The pool.
4493 * @param pPage The page.
4494 * @param pShwPD The shadow page directory (mapping of the page).
4495 */
4496DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4497{
4498 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4499 {
4500 if ( pShwPD->a[i].n.u1Present
4501 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING))
4502 {
4503#ifdef PGM_WITH_LARGE_PAGES
4504 if (pShwPD->a[i].b.u1Size)
4505 {
4506 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4507 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4508 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK,
4509 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4510 i);
4511 }
4512 else
4513#endif
4514 {
4515 Assert((pShwPD->a[i].u & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4516 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4517 if (pSubPage)
4518 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4519 else
4520 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4521 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4522 }
4523 }
4524 }
4525}
4526
4527
4528/**
4529 * Clear references to shadowed pages in a PAE page directory pointer table.
4530 *
4531 * @param pPool The pool.
4532 * @param pPage The page.
4533 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4534 */
4535DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4536{
4537 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4538 {
4539 Assert((pShwPDPT->a[i].u & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4540 if ( pShwPDPT->a[i].n.u1Present
4541 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4542 )
4543 {
4544 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4545 if (pSubPage)
4546 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4547 else
4548 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4549 }
4550 }
4551}
4552
4553
4554/**
4555 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4556 *
4557 * @param pPool The pool.
4558 * @param pPage The page.
4559 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4560 */
4561DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4562{
4563 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4564 {
4565 Assert((pShwPDPT->a[i].u & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4566 if (pShwPDPT->a[i].n.u1Present)
4567 {
4568 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4569 if (pSubPage)
4570 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4571 else
4572 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4573 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4574 }
4575 }
4576}
4577
4578
4579/**
4580 * Clear references to shadowed pages in a 64-bit level 4 page table.
4581 *
4582 * @param pPool The pool.
4583 * @param pPage The page.
4584 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4585 */
4586DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4587{
4588 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4589 {
4590 Assert((pShwPML4->a[i].u & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4591 if (pShwPML4->a[i].n.u1Present)
4592 {
4593 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4594 if (pSubPage)
4595 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4596 else
4597 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4598 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4599 }
4600 }
4601}
4602
4603
4604/**
4605 * Clear references to shadowed pages in an EPT page directory.
4606 *
4607 * @param pPool The pool.
4608 * @param pPage The page.
4609 * @param pShwPD The shadow page directory (mapping of the page).
4610 */
4611DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4612{
4613 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4614 {
4615 Assert((pShwPD->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4616 if (pShwPD->a[i].n.u1Present)
4617 {
4618#ifdef PGM_WITH_LARGE_PAGES
4619 if (pShwPD->a[i].b.u1Size)
4620 {
4621 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4622 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4623 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK,
4624 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4625 i);
4626 }
4627 else
4628#endif
4629 {
4630 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4631 if (pSubPage)
4632 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4633 else
4634 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4635 }
4636 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4637 }
4638 }
4639}
4640
4641
4642/**
4643 * Clear references to shadowed pages in an EPT page directory pointer table.
4644 *
4645 * @param pPool The pool.
4646 * @param pPage The page.
4647 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4648 */
4649DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4650{
4651 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4652 {
4653 Assert((pShwPDPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4654 if (pShwPDPT->a[i].n.u1Present)
4655 {
4656 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4657 if (pSubPage)
4658 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4659 else
4660 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4661 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4662 }
4663 }
4664}
4665
4666
4667/**
4668 * Clears all references made by this page.
4669 *
4670 * This includes other shadow pages and GC physical addresses.
4671 *
4672 * @param pPool The pool.
4673 * @param pPage The page.
4674 */
4675static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4676{
4677 /*
4678 * Map the shadow page and take action according to the page kind.
4679 */
4680 PVM pVM = pPool->CTX_SUFF(pVM);
4681 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4682 switch (pPage->enmKind)
4683 {
4684 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4685 {
4686 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4687 void *pvGst;
4688 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4689 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4690 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4691 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4692 break;
4693 }
4694
4695 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4696 {
4697 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4698 void *pvGst;
4699 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4700 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
4701 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4702 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4703 break;
4704 }
4705
4706 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4707 {
4708 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4709 void *pvGst;
4710 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4711 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
4712 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4713 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4714 break;
4715 }
4716
4717 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4718 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4719 {
4720 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4721 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4722 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4723 break;
4724 }
4725
4726 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4727 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4728 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4729 {
4730 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4731 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
4732 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4733 break;
4734 }
4735
4736 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4737 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4738 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4739 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4740 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4741 case PGMPOOLKIND_PAE_PD_PHYS:
4742 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4743 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4744 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4745 break;
4746
4747 case PGMPOOLKIND_32BIT_PD_PHYS:
4748 case PGMPOOLKIND_32BIT_PD:
4749 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4750 break;
4751
4752 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4753 case PGMPOOLKIND_PAE_PDPT:
4754 case PGMPOOLKIND_PAE_PDPT_PHYS:
4755 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4756 break;
4757
4758 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4759 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4760 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4761 break;
4762
4763 case PGMPOOLKIND_64BIT_PML4:
4764 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4765 break;
4766
4767 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4768 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4769 break;
4770
4771 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4772 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4773 break;
4774
4775 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4776 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4777 break;
4778
4779 default:
4780 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4781 }
4782
4783 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4784 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4785 ASMMemZeroPage(pvShw);
4786 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4787 pPage->fZeroed = true;
4788 Assert(!pPage->cPresent);
4789 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
4790}
4791
4792
4793/**
4794 * Flushes a pool page.
4795 *
4796 * This moves the page to the free list after removing all user references to it.
4797 *
4798 * @returns VBox status code.
4799 * @retval VINF_SUCCESS on success.
4800 * @param pPool The pool.
4801 * @param HCPhys The HC physical address of the shadow page.
4802 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4803 */
4804int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4805{
4806 PVM pVM = pPool->CTX_SUFF(pVM);
4807 bool fFlushRequired = false;
4808
4809 int rc = VINF_SUCCESS;
4810 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4811 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4812 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4813
4814 /*
4815 * Reject any attempts at flushing any of the special root pages (shall
4816 * not happen).
4817 */
4818 AssertMsgReturn(pPage->idx >= PGMPOOL_IDX_FIRST,
4819 ("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n",
4820 pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx),
4821 VINF_SUCCESS);
4822
4823 pgmLock(pVM);
4824
4825 /*
4826 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4827 */
4828 if (pgmPoolIsPageLocked(pPage))
4829 {
4830 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4831 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4832 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4833 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4834 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4835 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4836 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4837 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4838 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
4839 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
4840 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4841 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4842 pgmUnlock(pVM);
4843 return VINF_SUCCESS;
4844 }
4845
4846#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4847 /* Start a subset so we won't run out of mapping space. */
4848 PVMCPU pVCpu = VMMGetCpu(pVM);
4849 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
4850#endif
4851
4852 /*
4853 * Mark the page as being in need of an ASMMemZeroPage().
4854 */
4855 pPage->fZeroed = false;
4856
4857#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4858 if (pPage->fDirty)
4859 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirtyEntry, false /* do not remove */);
4860#endif
4861
4862 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4863 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4864 fFlushRequired = true;
4865
4866 /*
4867 * Clear the page.
4868 */
4869 pgmPoolTrackClearPageUsers(pPool, pPage);
4870 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4871 pgmPoolTrackDeref(pPool, pPage);
4872 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4873
4874 /*
4875 * Flush it from the cache.
4876 */
4877 pgmPoolCacheFlushPage(pPool, pPage);
4878
4879#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4880 /* Heavy stuff done. */
4881 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
4882#endif
4883
4884 /*
4885 * Deregistering the monitoring.
4886 */
4887 if (pPage->fMonitored)
4888 rc = pgmPoolMonitorFlush(pPool, pPage);
4889
4890 /*
4891 * Free the page.
4892 */
4893 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4894 pPage->iNext = pPool->iFreeHead;
4895 pPool->iFreeHead = pPage->idx;
4896 pPage->enmKind = PGMPOOLKIND_FREE;
4897 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4898 pPage->GCPhys = NIL_RTGCPHYS;
4899 pPage->fReusedFlushPending = false;
4900
4901 pPool->cUsedPages--;
4902
4903 /* Flush the TLBs of all VCPUs if required. */
4904 if ( fFlushRequired
4905 && fFlush)
4906 {
4907 PGM_INVL_ALL_VCPU_TLBS(pVM);
4908 }
4909
4910 pgmUnlock(pVM);
4911 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4912 return rc;
4913}
4914
4915
4916/**
4917 * Frees a usage of a pool page.
4918 *
4919 * The caller is responsible to updating the user table so that it no longer
4920 * references the shadow page.
4921 *
4922 * @param pPool The pool.
4923 * @param HCPhys The HC physical address of the shadow page.
4924 * @param iUser The shadow page pool index of the user table.
4925 * NIL_PGMPOOL_IDX for root pages.
4926 * @param iUserTable The index into the user table (shadowed). Ignored if
4927 * root page.
4928 */
4929void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4930{
4931 PVM pVM = pPool->CTX_SUFF(pVM);
4932
4933 STAM_PROFILE_START(&pPool->StatFree, a);
4934 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
4935 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4936 AssertReturnVoid(pPage->idx >= PGMPOOL_IDX_FIRST); /* paranoia (#6349) */
4937
4938 pgmLock(pVM);
4939 if (iUser != NIL_PGMPOOL_IDX)
4940 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4941 if (!pPage->fCached)
4942 pgmPoolFlushPage(pPool, pPage);
4943 pgmUnlock(pVM);
4944 STAM_PROFILE_STOP(&pPool->StatFree, a);
4945}
4946
4947
4948/**
4949 * Makes one or more free page free.
4950 *
4951 * @returns VBox status code.
4952 * @retval VINF_SUCCESS on success.
4953 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4954 *
4955 * @param pPool The pool.
4956 * @param enmKind Page table kind
4957 * @param iUser The user of the page.
4958 */
4959static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4960{
4961 PVM pVM = pPool->CTX_SUFF(pVM);
4962 LogFlow(("pgmPoolMakeMoreFreePages: enmKind=%d iUser=%d\n", enmKind, iUser));
4963 NOREF(enmKind);
4964
4965 /*
4966 * If the pool isn't full grown yet, expand it.
4967 */
4968 if ( pPool->cCurPages < pPool->cMaxPages
4969#if defined(IN_RC)
4970 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
4971 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4972 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
4973#endif
4974 )
4975 {
4976 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4977#ifdef IN_RING3
4978 int rc = PGMR3PoolGrow(pVM);
4979#else
4980 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
4981#endif
4982 if (RT_FAILURE(rc))
4983 return rc;
4984 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4985 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4986 return VINF_SUCCESS;
4987 }
4988
4989 /*
4990 * Free one cached page.
4991 */
4992 return pgmPoolCacheFreeOne(pPool, iUser);
4993}
4994
4995
4996/**
4997 * Allocates a page from the pool.
4998 *
4999 * This page may actually be a cached page and not in need of any processing
5000 * on the callers part.
5001 *
5002 * @returns VBox status code.
5003 * @retval VINF_SUCCESS if a NEW page was allocated.
5004 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
5005 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
5006 *
5007 * @param pVM Pointer to the VM.
5008 * @param GCPhys The GC physical address of the page we're gonna shadow.
5009 * For 4MB and 2MB PD entries, it's the first address the
5010 * shadow PT is covering.
5011 * @param enmKind The kind of mapping.
5012 * @param enmAccess Access type for the mapping (only relevant for big pages)
5013 * @param fA20Enabled Whether the A20 gate is enabled or not.
5014 * @param iUser The shadow page pool index of the user table. Root
5015 * pages should pass NIL_PGMPOOL_IDX.
5016 * @param iUserTable The index into the user table (shadowed). Ignored for
5017 * root pages (iUser == NIL_PGMPOOL_IDX).
5018 * @param fLockPage Lock the page
5019 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
5020 */
5021int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
5022 uint16_t iUser, uint32_t iUserTable, bool fLockPage, PPPGMPOOLPAGE ppPage)
5023{
5024 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5025 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
5026 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
5027 *ppPage = NULL;
5028 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
5029 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
5030 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
5031
5032 pgmLock(pVM);
5033
5034 if (pPool->fCacheEnabled)
5035 {
5036 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, fA20Enabled, iUser, iUserTable, ppPage);
5037 if (RT_SUCCESS(rc2))
5038 {
5039 if (fLockPage)
5040 pgmPoolLockPage(pPool, *ppPage);
5041 pgmUnlock(pVM);
5042 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5043 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
5044 return rc2;
5045 }
5046 }
5047
5048 /*
5049 * Allocate a new one.
5050 */
5051 int rc = VINF_SUCCESS;
5052 uint16_t iNew = pPool->iFreeHead;
5053 if (iNew == NIL_PGMPOOL_IDX)
5054 {
5055 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
5056 if (RT_FAILURE(rc))
5057 {
5058 pgmUnlock(pVM);
5059 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
5060 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5061 return rc;
5062 }
5063 iNew = pPool->iFreeHead;
5064 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_PGM_POOL_IPE);
5065 }
5066
5067 /* unlink the free head */
5068 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5069 pPool->iFreeHead = pPage->iNext;
5070 pPage->iNext = NIL_PGMPOOL_IDX;
5071
5072 /*
5073 * Initialize it.
5074 */
5075 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5076 pPage->enmKind = enmKind;
5077 pPage->enmAccess = enmAccess;
5078 pPage->GCPhys = GCPhys;
5079 pPage->fA20Enabled = fA20Enabled;
5080 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5081 pPage->fMonitored = false;
5082 pPage->fCached = false;
5083 pPage->fDirty = false;
5084 pPage->fReusedFlushPending = false;
5085 pPage->cModifications = 0;
5086 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5087 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5088 pPage->cPresent = 0;
5089 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5090 pPage->idxDirtyEntry = 0;
5091 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5092 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5093 pPage->cLastAccessHandler = 0;
5094 pPage->cLocked = 0;
5095# ifdef VBOX_STRICT
5096 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5097# endif
5098
5099 /*
5100 * Insert into the tracking and cache. If this fails, free the page.
5101 */
5102 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5103 if (RT_FAILURE(rc3))
5104 {
5105 pPool->cUsedPages--;
5106 pPage->enmKind = PGMPOOLKIND_FREE;
5107 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5108 pPage->GCPhys = NIL_RTGCPHYS;
5109 pPage->iNext = pPool->iFreeHead;
5110 pPool->iFreeHead = pPage->idx;
5111 pgmUnlock(pVM);
5112 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5113 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
5114 return rc3;
5115 }
5116
5117 /*
5118 * Commit the allocation, clear the page and return.
5119 */
5120#ifdef VBOX_WITH_STATISTICS
5121 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5122 pPool->cUsedPagesHigh = pPool->cUsedPages;
5123#endif
5124
5125 if (!pPage->fZeroed)
5126 {
5127 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5128 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5129 ASMMemZeroPage(pv);
5130 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5131 }
5132
5133 *ppPage = pPage;
5134 if (fLockPage)
5135 pgmPoolLockPage(pPool, pPage);
5136 pgmUnlock(pVM);
5137 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5138 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5139 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5140 return rc;
5141}
5142
5143
5144/**
5145 * Frees a usage of a pool page.
5146 *
5147 * @param pVM Pointer to the VM.
5148 * @param HCPhys The HC physical address of the shadow page.
5149 * @param iUser The shadow page pool index of the user table.
5150 * NIL_PGMPOOL_IDX if root page.
5151 * @param iUserTable The index into the user table (shadowed). Ignored if
5152 * root page.
5153 */
5154void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5155{
5156 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5157 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5158 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5159}
5160
5161
5162/**
5163 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5164 *
5165 * @returns Pointer to the shadow page structure.
5166 * @param pPool The pool.
5167 * @param HCPhys The HC physical address of the shadow page.
5168 */
5169PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5170{
5171 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5172
5173 /*
5174 * Look up the page.
5175 */
5176 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5177
5178 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5179 return pPage;
5180}
5181
5182
5183/**
5184 * Internal worker for finding a page for debugging purposes, no assertions.
5185 *
5186 * @returns Pointer to the shadow page structure. NULL on if not found.
5187 * @param pPool The pool.
5188 * @param HCPhys The HC physical address of the shadow page.
5189 */
5190PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5191{
5192 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5193 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5194}
5195
5196#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5197
5198/**
5199 * Flush the specified page if present
5200 *
5201 * @param pVM Pointer to the VM.
5202 * @param GCPhys Guest physical address of the page to flush
5203 */
5204void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5205{
5206 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5207
5208 VM_ASSERT_EMT(pVM);
5209
5210 /*
5211 * Look up the GCPhys in the hash.
5212 */
5213 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5214 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5215 if (i == NIL_PGMPOOL_IDX)
5216 return;
5217
5218 do
5219 {
5220 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5221 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5222 {
5223 switch (pPage->enmKind)
5224 {
5225 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5226 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5227 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5228 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5229 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5230 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5231 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5232 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5233 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5234 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5235 case PGMPOOLKIND_64BIT_PML4:
5236 case PGMPOOLKIND_32BIT_PD:
5237 case PGMPOOLKIND_PAE_PDPT:
5238 {
5239 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5240#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5241 if (pPage->fDirty)
5242 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5243 else
5244#endif
5245 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5246 Assert(!pgmPoolIsPageLocked(pPage));
5247 pgmPoolMonitorChainFlush(pPool, pPage);
5248 return;
5249 }
5250
5251 /* ignore, no monitoring. */
5252 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5253 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5254 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5255 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5256 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5257 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5258 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5259 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5260 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5261 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5262 case PGMPOOLKIND_ROOT_NESTED:
5263 case PGMPOOLKIND_PAE_PD_PHYS:
5264 case PGMPOOLKIND_PAE_PDPT_PHYS:
5265 case PGMPOOLKIND_32BIT_PD_PHYS:
5266 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5267 break;
5268
5269 default:
5270 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5271 }
5272 }
5273
5274 /* next */
5275 i = pPage->iNext;
5276 } while (i != NIL_PGMPOOL_IDX);
5277 return;
5278}
5279
5280#endif /* IN_RING3 */
5281#ifdef IN_RING3
5282
5283/**
5284 * Reset CPU on hot plugging.
5285 *
5286 * @param pVM Pointer to the VM.
5287 * @param pVCpu The virtual CPU.
5288 */
5289void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5290{
5291 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5292
5293 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5294 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5295 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5296}
5297
5298
5299/**
5300 * Flushes the entire cache.
5301 *
5302 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5303 * this and execute this CR3 flush.
5304 *
5305 * @param pPool The pool.
5306 */
5307void pgmR3PoolReset(PVM pVM)
5308{
5309 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5310
5311 PGM_LOCK_ASSERT_OWNER(pVM);
5312 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5313 LogFlow(("pgmR3PoolReset:\n"));
5314
5315 /*
5316 * If there are no pages in the pool, there is nothing to do.
5317 */
5318 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5319 {
5320 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5321 return;
5322 }
5323
5324 /*
5325 * Exit the shadow mode since we're going to clear everything,
5326 * including the root page.
5327 */
5328 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5329 pgmR3ExitShadowModeBeforePoolFlush(&pVM->aCpus[i]);
5330
5331 /*
5332 * Nuke the free list and reinsert all pages into it.
5333 */
5334 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5335 {
5336 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5337
5338 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
5339 if (pPage->fMonitored)
5340 pgmPoolMonitorFlush(pPool, pPage);
5341 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5342 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5343 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5344 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5345 pPage->cModifications = 0;
5346 pPage->GCPhys = NIL_RTGCPHYS;
5347 pPage->enmKind = PGMPOOLKIND_FREE;
5348 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5349 Assert(pPage->idx == i);
5350 pPage->iNext = i + 1;
5351 pPage->fA20Enabled = true;
5352 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5353 pPage->fSeenNonGlobal = false;
5354 pPage->fMonitored = false;
5355 pPage->fDirty = false;
5356 pPage->fCached = false;
5357 pPage->fReusedFlushPending = false;
5358 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5359 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5360 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5361 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5362 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5363 pPage->cLastAccessHandler = 0;
5364 pPage->cLocked = 0;
5365#ifdef VBOX_STRICT
5366 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5367#endif
5368 }
5369 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5370 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5371 pPool->cUsedPages = 0;
5372
5373 /*
5374 * Zap and reinitialize the user records.
5375 */
5376 pPool->cPresent = 0;
5377 pPool->iUserFreeHead = 0;
5378 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5379 const unsigned cMaxUsers = pPool->cMaxUsers;
5380 for (unsigned i = 0; i < cMaxUsers; i++)
5381 {
5382 paUsers[i].iNext = i + 1;
5383 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5384 paUsers[i].iUserTable = 0xfffffffe;
5385 }
5386 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5387
5388 /*
5389 * Clear all the GCPhys links and rebuild the phys ext free list.
5390 */
5391 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
5392 pRam;
5393 pRam = pRam->CTX_SUFF(pNext))
5394 {
5395 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5396 while (iPage-- > 0)
5397 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5398 }
5399
5400 pPool->iPhysExtFreeHead = 0;
5401 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5402 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5403 for (unsigned i = 0; i < cMaxPhysExts; i++)
5404 {
5405 paPhysExts[i].iNext = i + 1;
5406 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5407 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5408 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5409 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5410 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5411 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5412 }
5413 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5414
5415 /*
5416 * Just zap the modified list.
5417 */
5418 pPool->cModifiedPages = 0;
5419 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5420
5421 /*
5422 * Clear the GCPhys hash and the age list.
5423 */
5424 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5425 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5426 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5427 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5428
5429#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5430 /* Clear all dirty pages. */
5431 pPool->idxFreeDirtyPage = 0;
5432 pPool->cDirtyPages = 0;
5433 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
5434 pPool->aDirtyPages[i].uIdx = NIL_PGMPOOL_IDX;
5435#endif
5436
5437 /*
5438 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5439 */
5440 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5441 {
5442 /*
5443 * Re-enter the shadowing mode and assert Sync CR3 FF.
5444 */
5445 PVMCPU pVCpu = &pVM->aCpus[i];
5446 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5447 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5448 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5449 }
5450
5451 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5452}
5453
5454#endif /* IN_RING3 */
5455
5456#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5457/**
5458 * Stringifies a PGMPOOLKIND value.
5459 */
5460static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5461{
5462 switch ((PGMPOOLKIND)enmKind)
5463 {
5464 case PGMPOOLKIND_INVALID:
5465 return "PGMPOOLKIND_INVALID";
5466 case PGMPOOLKIND_FREE:
5467 return "PGMPOOLKIND_FREE";
5468 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5469 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5470 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5471 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5472 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5473 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5474 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5475 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5476 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5477 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5478 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5479 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5480 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5481 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5482 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5483 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5484 case PGMPOOLKIND_32BIT_PD:
5485 return "PGMPOOLKIND_32BIT_PD";
5486 case PGMPOOLKIND_32BIT_PD_PHYS:
5487 return "PGMPOOLKIND_32BIT_PD_PHYS";
5488 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5489 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5490 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5491 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5492 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5493 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5494 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5495 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5496 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5497 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5498 case PGMPOOLKIND_PAE_PD_PHYS:
5499 return "PGMPOOLKIND_PAE_PD_PHYS";
5500 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5501 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5502 case PGMPOOLKIND_PAE_PDPT:
5503 return "PGMPOOLKIND_PAE_PDPT";
5504 case PGMPOOLKIND_PAE_PDPT_PHYS:
5505 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5506 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5507 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5508 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5509 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5510 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5511 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5512 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5513 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5514 case PGMPOOLKIND_64BIT_PML4:
5515 return "PGMPOOLKIND_64BIT_PML4";
5516 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5517 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5518 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5519 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5520 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5521 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5522 case PGMPOOLKIND_ROOT_NESTED:
5523 return "PGMPOOLKIND_ROOT_NESTED";
5524 }
5525 return "Unknown kind!";
5526}
5527#endif /* LOG_ENABLED || VBOX_STRICT */
5528
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette