VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 42536

Last change on this file since 42536 was 42188, checked in by vboxsync, 13 years ago

VMM: Changed a few ifndef IN_RING0 to ifndef VBOX_WITH_RAW_MODE_NOT_R0.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 210.1 KB
Line 
1/* $Id: PGMAllPool.cpp 42188 2012-07-17 13:50:51Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2012 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#include <VBox/vmm/pgm.h>
24#include <VBox/vmm/mm.h>
25#include <VBox/vmm/em.h>
26#include <VBox/vmm/cpum.h>
27#ifdef IN_RC
28# include <VBox/vmm/patm.h>
29#endif
30#include "PGMInternal.h"
31#include <VBox/vmm/vm.h>
32#include "PGMInline.h"
33#include <VBox/disopcode.h>
34#include <VBox/vmm/hwacc_vmx.h>
35
36#include <VBox/log.h>
37#include <VBox/err.h>
38#include <iprt/asm.h>
39#include <iprt/asm-amd64-x86.h>
40#include <iprt/string.h>
41
42
43/*******************************************************************************
44* Internal Functions *
45*******************************************************************************/
46RT_C_DECLS_BEGIN
47DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
48DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
49static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
50static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
51static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
52#ifndef IN_RING3
53DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
54#endif
55#ifdef LOG_ENABLED
56static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
57#endif
58#if 0 /*defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)*/
59static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
60#endif
61
62int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
63PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
64void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
65void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
66
67RT_C_DECLS_END
68
69
70/**
71 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
72 *
73 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
74 * @param enmKind The page kind.
75 */
76DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
77{
78 switch (enmKind)
79 {
80 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
81 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
82 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
83 return true;
84 default:
85 return false;
86 }
87}
88
89
90/**
91 * Flushes a chain of pages sharing the same access monitor.
92 *
93 * @returns VBox status code suitable for scheduling.
94 * @param pPool The pool.
95 * @param pPage A page in the chain.
96 * @todo VBOXSTRICTRC
97 */
98int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
99{
100 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
101
102 /*
103 * Find the list head.
104 */
105 uint16_t idx = pPage->idx;
106 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
107 {
108 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
109 {
110 idx = pPage->iMonitoredPrev;
111 Assert(idx != pPage->idx);
112 pPage = &pPool->aPages[idx];
113 }
114 }
115
116 /*
117 * Iterate the list flushing each shadow page.
118 */
119 int rc = VINF_SUCCESS;
120 for (;;)
121 {
122 idx = pPage->iMonitoredNext;
123 Assert(idx != pPage->idx);
124 if (pPage->idx >= PGMPOOL_IDX_FIRST)
125 {
126 int rc2 = pgmPoolFlushPage(pPool, pPage);
127 AssertRC(rc2);
128 }
129 /* next */
130 if (idx == NIL_PGMPOOL_IDX)
131 break;
132 pPage = &pPool->aPages[idx];
133 }
134 return rc;
135}
136
137
138/**
139 * Wrapper for getting the current context pointer to the entry being modified.
140 *
141 * @returns VBox status code suitable for scheduling.
142 * @param pVM Pointer to the VM.
143 * @param pvDst Destination address
144 * @param pvSrc Source guest virtual address.
145 * @param GCPhysSrc The source guest physical address.
146 * @param cb Size of data to read
147 */
148DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc,
149 RTGCPHYS GCPhysSrc, size_t cb)
150{
151#if defined(IN_RING3)
152 NOREF(pVM); NOREF(GCPhysSrc);
153 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
154 return VINF_SUCCESS;
155#else
156 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
157 NOREF(pvSrc);
158 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
159#endif
160}
161
162
163/**
164 * Process shadow entries before they are changed by the guest.
165 *
166 * For PT entries we will clear them. For PD entries, we'll simply check
167 * for mapping conflicts and set the SyncCR3 FF if found.
168 *
169 * @param pVCpu Pointer to the VMCPU.
170 * @param pPool The pool.
171 * @param pPage The head page.
172 * @param GCPhysFault The guest physical fault address.
173 * @param uAddress In R0 and GC this is the guest context fault address (flat).
174 * In R3 this is the host context 'fault' address.
175 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
176 */
177void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault,
178 CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, unsigned cbWrite)
179{
180 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
181 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
182 PVM pVM = pPool->CTX_SUFF(pVM);
183 NOREF(pVCpu);
184
185 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))pvAddress, GCPhysFault, cbWrite));
186
187 for (;;)
188 {
189 union
190 {
191 void *pv;
192 PX86PT pPT;
193 PPGMSHWPTPAE pPTPae;
194 PX86PD pPD;
195 PX86PDPAE pPDPae;
196 PX86PDPT pPDPT;
197 PX86PML4 pPML4;
198 } uShw;
199
200 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s\n", pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
201
202 uShw.pv = NULL;
203 switch (pPage->enmKind)
204 {
205 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
206 {
207 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
208 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
209 const unsigned iShw = off / sizeof(X86PTE);
210 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
211 if (uShw.pPT->a[iShw].n.u1Present)
212 {
213 X86PTE GstPte;
214
215 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
216 AssertRC(rc);
217 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
218 pgmPoolTracDerefGCPhysHint(pPool, pPage,
219 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
220 GstPte.u & X86_PTE_PG_MASK,
221 iShw);
222 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
223 }
224 break;
225 }
226
227 /* page/2 sized */
228 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
229 {
230 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
231 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
232 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
233 {
234 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
235 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
236 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
237 {
238 X86PTE GstPte;
239 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
240 AssertRC(rc);
241
242 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
243 pgmPoolTracDerefGCPhysHint(pPool, pPage,
244 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
245 GstPte.u & X86_PTE_PG_MASK,
246 iShw);
247 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
248 }
249 }
250 break;
251 }
252
253 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
254 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
255 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
256 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
257 {
258 unsigned iGst = off / sizeof(X86PDE);
259 unsigned iShwPdpt = iGst / 256;
260 unsigned iShw = (iGst % 256) * 2;
261 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
262
263 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
264 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
265 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
266 {
267 for (unsigned i = 0; i < 2; i++)
268 {
269# ifdef VBOX_WITH_RAW_MODE_NOT_R0
270 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
271 {
272 Assert(pgmMapAreMappingsEnabled(pVM));
273 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
274 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
275 break;
276 }
277# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
278 if (uShw.pPDPae->a[iShw+i].n.u1Present)
279 {
280 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
281 pgmPoolFree(pVM,
282 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
283 pPage->idx,
284 iShw + i);
285 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw+i].u, 0);
286 }
287
288 /* paranoia / a bit assumptive. */
289 if ( (off & 3)
290 && (off & 3) + cbWrite > 4)
291 {
292 const unsigned iShw2 = iShw + 2 + i;
293 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
294 {
295# ifdef VBOX_WITH_RAW_MODE_NOT_R0
296 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
297 {
298 Assert(pgmMapAreMappingsEnabled(pVM));
299 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
300 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
301 break;
302 }
303# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
304 if (uShw.pPDPae->a[iShw2].n.u1Present)
305 {
306 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
307 pgmPoolFree(pVM,
308 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
309 pPage->idx,
310 iShw2);
311 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
312 }
313 }
314 }
315 }
316 }
317 break;
318 }
319
320 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
321 {
322 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
323 const unsigned iShw = off / sizeof(X86PTEPAE);
324 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
325 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
326 {
327 X86PTEPAE GstPte;
328 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
329 AssertRC(rc);
330
331 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
332 pgmPoolTracDerefGCPhysHint(pPool, pPage,
333 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
334 GstPte.u & X86_PTE_PAE_PG_MASK,
335 iShw);
336 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
337 }
338
339 /* paranoia / a bit assumptive. */
340 if ( (off & 7)
341 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
342 {
343 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
344 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
345
346 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
347 {
348 X86PTEPAE GstPte;
349# ifdef IN_RING3
350 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
351# else
352 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
353# endif
354 AssertRC(rc);
355 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
356 pgmPoolTracDerefGCPhysHint(pPool, pPage,
357 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
358 GstPte.u & X86_PTE_PAE_PG_MASK,
359 iShw2);
360 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
361 }
362 }
363 break;
364 }
365
366 case PGMPOOLKIND_32BIT_PD:
367 {
368 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
369 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
370
371 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
372 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
373# ifdef VBOX_WITH_RAW_MODE_NOT_R0
374 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
375 {
376 Assert(pgmMapAreMappingsEnabled(pVM));
377 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
378 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
379 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
380 break;
381 }
382 else
383# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
384 {
385 if (uShw.pPD->a[iShw].n.u1Present)
386 {
387 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
388 pgmPoolFree(pVM,
389 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
390 pPage->idx,
391 iShw);
392 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
393 }
394 }
395 /* paranoia / a bit assumptive. */
396 if ( (off & 3)
397 && (off & 3) + cbWrite > sizeof(X86PTE))
398 {
399 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
400 if ( iShw2 != iShw
401 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
402 {
403# ifdef VBOX_WITH_RAW_MODE_NOT_R0
404 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
405 {
406 Assert(pgmMapAreMappingsEnabled(pVM));
407 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
408 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
409 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
410 break;
411 }
412# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
413 if (uShw.pPD->a[iShw2].n.u1Present)
414 {
415 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
416 pgmPoolFree(pVM,
417 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
418 pPage->idx,
419 iShw2);
420 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
421 }
422 }
423 }
424#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
425 if ( uShw.pPD->a[iShw].n.u1Present
426 && !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
427 {
428 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
429# ifdef IN_RC /* TLB load - we're pushing things a bit... */
430 ASMProbeReadByte(pvAddress);
431# endif
432 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
433 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
434 }
435#endif
436 break;
437 }
438
439 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
440 {
441 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
442 const unsigned iShw = off / sizeof(X86PDEPAE);
443 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
444#ifdef VBOX_WITH_RAW_MODE_NOT_R0
445 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
446 {
447 Assert(pgmMapAreMappingsEnabled(pVM));
448 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
449 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
450 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
451 break;
452 }
453#endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
454 /*
455 * Causes trouble when the guest uses a PDE to refer to the whole page table level
456 * structure. (Invalidate here; faults later on when it tries to change the page
457 * table entries -> recheck; probably only applies to the RC case.)
458 */
459#ifdef VBOX_WITH_RAW_MODE_NOT_R0
460 else
461#endif
462 {
463 if (uShw.pPDPae->a[iShw].n.u1Present)
464 {
465 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
466 pgmPoolFree(pVM,
467 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
468 pPage->idx,
469 iShw);
470 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
471 }
472 }
473 /* paranoia / a bit assumptive. */
474 if ( (off & 7)
475 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
476 {
477 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
478 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
479
480#ifdef VBOX_WITH_RAW_MODE_NOT_R0
481 if ( iShw2 != iShw
482 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
483 {
484 Assert(pgmMapAreMappingsEnabled(pVM));
485 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
486 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
487 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
488 break;
489 }
490 else
491#endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
492 if (uShw.pPDPae->a[iShw2].n.u1Present)
493 {
494 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
495 pgmPoolFree(pVM,
496 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
497 pPage->idx,
498 iShw2);
499 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
500 }
501 }
502 break;
503 }
504
505 case PGMPOOLKIND_PAE_PDPT:
506 {
507 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
508 /*
509 * Hopefully this doesn't happen very often:
510 * - touching unused parts of the page
511 * - messing with the bits of pd pointers without changing the physical address
512 */
513 /* PDPT roots are not page aligned; 32 byte only! */
514 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
515
516 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
517 const unsigned iShw = offPdpt / sizeof(X86PDPE);
518 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
519 {
520# ifdef VBOX_WITH_RAW_MODE_NOT_R0
521 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
522 {
523 Assert(pgmMapAreMappingsEnabled(pVM));
524 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
525 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
526 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
527 break;
528 }
529 else
530# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
531 if (uShw.pPDPT->a[iShw].n.u1Present)
532 {
533 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
534 pgmPoolFree(pVM,
535 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
536 pPage->idx,
537 iShw);
538 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
539 }
540
541 /* paranoia / a bit assumptive. */
542 if ( (offPdpt & 7)
543 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
544 {
545 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
546 if ( iShw2 != iShw
547 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
548 {
549# ifdef VBOX_WITH_RAW_MODE_NOT_R0
550 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
551 {
552 Assert(pgmMapAreMappingsEnabled(pVM));
553 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
554 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
555 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
556 break;
557 }
558 else
559# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
560 if (uShw.pPDPT->a[iShw2].n.u1Present)
561 {
562 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
563 pgmPoolFree(pVM,
564 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
565 pPage->idx,
566 iShw2);
567 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
568 }
569 }
570 }
571 }
572 break;
573 }
574
575#ifndef IN_RC
576 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
577 {
578 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
579 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
580 const unsigned iShw = off / sizeof(X86PDEPAE);
581 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
582 if (uShw.pPDPae->a[iShw].n.u1Present)
583 {
584 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
585 pgmPoolFree(pVM,
586 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
587 pPage->idx,
588 iShw);
589 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
590 }
591 /* paranoia / a bit assumptive. */
592 if ( (off & 7)
593 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
594 {
595 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
596 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
597
598 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
599 if (uShw.pPDPae->a[iShw2].n.u1Present)
600 {
601 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
602 pgmPoolFree(pVM,
603 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
604 pPage->idx,
605 iShw2);
606 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
607 }
608 }
609 break;
610 }
611
612 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
613 {
614 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
615 /*
616 * Hopefully this doesn't happen very often:
617 * - messing with the bits of pd pointers without changing the physical address
618 */
619 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
620 const unsigned iShw = off / sizeof(X86PDPE);
621 if (uShw.pPDPT->a[iShw].n.u1Present)
622 {
623 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
624 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
625 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
626 }
627 /* paranoia / a bit assumptive. */
628 if ( (off & 7)
629 && (off & 7) + cbWrite > sizeof(X86PDPE))
630 {
631 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
632 if (uShw.pPDPT->a[iShw2].n.u1Present)
633 {
634 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
635 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
636 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
637 }
638 }
639 break;
640 }
641
642 case PGMPOOLKIND_64BIT_PML4:
643 {
644 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
645 /*
646 * Hopefully this doesn't happen very often:
647 * - messing with the bits of pd pointers without changing the physical address
648 */
649 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
650 const unsigned iShw = off / sizeof(X86PDPE);
651 if (uShw.pPML4->a[iShw].n.u1Present)
652 {
653 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
654 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
655 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
656 }
657 /* paranoia / a bit assumptive. */
658 if ( (off & 7)
659 && (off & 7) + cbWrite > sizeof(X86PDPE))
660 {
661 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
662 if (uShw.pPML4->a[iShw2].n.u1Present)
663 {
664 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
665 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
666 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
667 }
668 }
669 break;
670 }
671#endif /* IN_RING0 */
672
673 default:
674 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
675 }
676 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
677
678 /* next */
679 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
680 return;
681 pPage = &pPool->aPages[pPage->iMonitoredNext];
682 }
683}
684
685# ifndef IN_RING3
686
687/**
688 * Checks if a access could be a fork operation in progress.
689 *
690 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
691 *
692 * @returns true if it's likely that we're forking, otherwise false.
693 * @param pPool The pool.
694 * @param pDis The disassembled instruction.
695 * @param offFault The access offset.
696 */
697DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
698{
699 /*
700 * i386 linux is using btr to clear X86_PTE_RW.
701 * The functions involved are (2.6.16 source inspection):
702 * clear_bit
703 * ptep_set_wrprotect
704 * copy_one_pte
705 * copy_pte_range
706 * copy_pmd_range
707 * copy_pud_range
708 * copy_page_range
709 * dup_mmap
710 * dup_mm
711 * copy_mm
712 * copy_process
713 * do_fork
714 */
715 if ( pDis->pCurInstr->uOpcode == OP_BTR
716 && !(offFault & 4)
717 /** @todo Validate that the bit index is X86_PTE_RW. */
718 )
719 {
720 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
721 return true;
722 }
723 return false;
724}
725
726
727/**
728 * Determine whether the page is likely to have been reused.
729 *
730 * @returns true if we consider the page as being reused for a different purpose.
731 * @returns false if we consider it to still be a paging page.
732 * @param pVM Pointer to the VM.
733 * @param pVCpu Pointer to the VMCPU.
734 * @param pRegFrame Trap register frame.
735 * @param pDis The disassembly info for the faulting instruction.
736 * @param pvFault The fault address.
737 *
738 * @remark The REP prefix check is left to the caller because of STOSD/W.
739 */
740DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
741{
742#ifndef IN_RC
743 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
744 if ( HWACCMHasPendingIrq(pVM)
745 && (pRegFrame->rsp - pvFault) < 32)
746 {
747 /* Fault caused by stack writes while trying to inject an interrupt event. */
748 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
749 return true;
750 }
751#else
752 NOREF(pVM); NOREF(pvFault);
753#endif
754
755 LogFlow(("Reused instr %RGv %d at %RGv param1.fUse=%llx param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->uOpcode, pvFault, pDis->Param1.fUse, pDis->Param1.Base.idxGenReg));
756
757 /* Non-supervisor mode write means it's used for something else. */
758 if (CPUMGetGuestCPL(pVCpu) != 0)
759 return true;
760
761 switch (pDis->pCurInstr->uOpcode)
762 {
763 /* call implies the actual push of the return address faulted */
764 case OP_CALL:
765 Log4(("pgmPoolMonitorIsReused: CALL\n"));
766 return true;
767 case OP_PUSH:
768 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
769 return true;
770 case OP_PUSHF:
771 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
772 return true;
773 case OP_PUSHA:
774 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
775 return true;
776 case OP_FXSAVE:
777 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
778 return true;
779 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
780 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
781 return true;
782 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
783 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
784 return true;
785 case OP_MOVSWD:
786 case OP_STOSWD:
787 if ( pDis->fPrefix == (DISPREFIX_REP|DISPREFIX_REX)
788 && pRegFrame->rcx >= 0x40
789 )
790 {
791 Assert(pDis->uCpuMode == DISCPUMODE_64BIT);
792
793 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
794 return true;
795 }
796 return false;
797 }
798 if ( ( (pDis->Param1.fUse & DISUSE_REG_GEN32)
799 || (pDis->Param1.fUse & DISUSE_REG_GEN64))
800 && (pDis->Param1.Base.idxGenReg == DISGREG_ESP))
801 {
802 Log4(("pgmPoolMonitorIsReused: ESP\n"));
803 return true;
804 }
805
806 return false;
807}
808
809
810/**
811 * Flushes the page being accessed.
812 *
813 * @returns VBox status code suitable for scheduling.
814 * @param pVM Pointer to the VM.
815 * @param pVCpu Pointer to the VMCPU.
816 * @param pPool The pool.
817 * @param pPage The pool page (head).
818 * @param pDis The disassembly of the write instruction.
819 * @param pRegFrame The trap register frame.
820 * @param GCPhysFault The fault address as guest physical address.
821 * @param pvFault The fault address.
822 * @todo VBOXSTRICTRC
823 */
824static int pgmPoolAccessHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
825 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
826{
827 NOREF(GCPhysFault);
828
829 /*
830 * First, do the flushing.
831 */
832 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
833
834 /*
835 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
836 * Must do this in raw mode (!); XP boot will fail otherwise.
837 */
838 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
839 if (RT_SUCCESS(rc2))
840 AssertMsg(rc2 == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
841 else if (rc2 == VERR_EM_INTERPRETER)
842 {
843#ifdef IN_RC
844 if (PATMIsPatchGCAddr(pVM, pRegFrame->eip))
845 {
846 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
847 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->eip));
848 rc = VINF_SUCCESS;
849 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
850 }
851 else
852#endif
853 {
854 rc = VINF_EM_RAW_EMULATE_INSTR;
855 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
856 }
857 }
858 else
859 rc = VBOXSTRICTRC_VAL(rc2);
860
861 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
862 return rc;
863}
864
865
866/**
867 * Handles the STOSD write accesses.
868 *
869 * @returns VBox status code suitable for scheduling.
870 * @param pVM Pointer to the VM.
871 * @param pPool The pool.
872 * @param pPage The pool page (head).
873 * @param pDis The disassembly of the write instruction.
874 * @param pRegFrame The trap register frame.
875 * @param GCPhysFault The fault address as guest physical address.
876 * @param pvFault The fault address.
877 */
878DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
879 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
880{
881 unsigned uIncrement = pDis->Param1.cb;
882 NOREF(pVM);
883
884 Assert(pDis->uCpuMode == DISCPUMODE_32BIT || pDis->uCpuMode == DISCPUMODE_64BIT);
885 Assert(pRegFrame->rcx <= 0x20);
886
887#ifdef VBOX_STRICT
888 if (pDis->uOpMode == DISCPUMODE_32BIT)
889 Assert(uIncrement == 4);
890 else
891 Assert(uIncrement == 8);
892#endif
893
894 Log3(("pgmPoolAccessHandlerSTOSD\n"));
895
896 /*
897 * Increment the modification counter and insert it into the list
898 * of modified pages the first time.
899 */
900 if (!pPage->cModifications++)
901 pgmPoolMonitorModifiedInsert(pPool, pPage);
902
903 /*
904 * Execute REP STOSD.
905 *
906 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
907 * write situation, meaning that it's safe to write here.
908 */
909 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
910 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
911 while (pRegFrame->rcx)
912 {
913#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
914 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
915 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
916 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
917#else
918 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
919#endif
920#ifdef IN_RC
921 *(uint32_t *)(uintptr_t)pu32 = pRegFrame->eax;
922#else
923 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
924#endif
925 pu32 += uIncrement;
926 GCPhysFault += uIncrement;
927 pRegFrame->rdi += uIncrement;
928 pRegFrame->rcx--;
929 }
930 pRegFrame->rip += pDis->cbInstr;
931
932 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
933 return VINF_SUCCESS;
934}
935
936
937/**
938 * Handles the simple write accesses.
939 *
940 * @returns VBox status code suitable for scheduling.
941 * @param pVM Pointer to the VM.
942 * @param pVCpu Pointer to the VMCPU.
943 * @param pPool The pool.
944 * @param pPage The pool page (head).
945 * @param pDis The disassembly of the write instruction.
946 * @param pRegFrame The trap register frame.
947 * @param GCPhysFault The fault address as guest physical address.
948 * @param pvFault The fault address.
949 * @param pfReused Reused state (in/out)
950 */
951DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
952 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
953{
954 Log3(("pgmPoolAccessHandlerSimple\n"));
955 NOREF(pfReused); /* initialized by caller */
956
957 /*
958 * Increment the modification counter and insert it into the list
959 * of modified pages the first time.
960 */
961 if (!pPage->cModifications++)
962 pgmPoolMonitorModifiedInsert(pPool, pPage);
963
964 /*
965 * Clear all the pages. ASSUMES that pvFault is readable.
966 */
967#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
968 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
969 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->Param1));
970 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
971#else
972 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->Param1));
973#endif
974
975 /*
976 * Interpret the instruction.
977 */
978 VBOXSTRICTRC rc = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
979 if (RT_SUCCESS(rc))
980 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
981 else if (rc == VERR_EM_INTERPRETER)
982 {
983 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
984 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode));
985 rc = VINF_EM_RAW_EMULATE_INSTR;
986 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
987 }
988
989#if 0 /* experimental code */
990 if (rc == VINF_SUCCESS)
991 {
992 switch (pPage->enmKind)
993 {
994 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
995 {
996 X86PTEPAE GstPte;
997 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
998 AssertRC(rc);
999
1000 /* Check the new value written by the guest. If present and with a bogus physical address, then
1001 * it's fairly safe to assume the guest is reusing the PT.
1002 */
1003 if (GstPte.n.u1Present)
1004 {
1005 RTHCPHYS HCPhys = -1;
1006 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1007 if (rc != VINF_SUCCESS)
1008 {
1009 *pfReused = true;
1010 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1011 }
1012 }
1013 break;
1014 }
1015 }
1016 }
1017#endif
1018
1019 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc\n", VBOXSTRICTRC_VAL(rc)));
1020 return VBOXSTRICTRC_VAL(rc);
1021}
1022
1023
1024/**
1025 * \#PF Handler callback for PT write accesses.
1026 *
1027 * @returns VBox status code (appropriate for GC return).
1028 * @param pVM Pointer to the VM.
1029 * @param uErrorCode CPU Error code.
1030 * @param pRegFrame Trap register frame.
1031 * NULL on DMA and other non CPU access.
1032 * @param pvFault The fault address (cr2).
1033 * @param GCPhysFault The GC physical address corresponding to pvFault.
1034 * @param pvUser User argument.
1035 */
1036DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault,
1037 RTGCPHYS GCPhysFault, void *pvUser)
1038{
1039 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1040 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1041 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1042 PVMCPU pVCpu = VMMGetCpu(pVM);
1043 unsigned cMaxModifications;
1044 bool fForcedFlush = false;
1045 NOREF(uErrorCode);
1046
1047 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1048
1049 pgmLock(pVM);
1050 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1051 {
1052 /* Pool page changed while we were waiting for the lock; ignore. */
1053 Log(("CPU%d: pgmPoolAccessHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1054 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1055 pgmUnlock(pVM);
1056 return VINF_SUCCESS;
1057 }
1058#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1059 if (pPage->fDirty)
1060 {
1061 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH));
1062 pgmUnlock(pVM);
1063 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1064 }
1065#endif
1066
1067#if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1068 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1069 {
1070 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1071 void *pvGst;
1072 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1073 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1074 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1075 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1076 }
1077#endif
1078
1079 /*
1080 * Disassemble the faulting instruction.
1081 */
1082 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1083 int rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
1084 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1085 {
1086 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1087 pgmUnlock(pVM);
1088 return rc;
1089 }
1090
1091 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1092
1093 /*
1094 * We should ALWAYS have the list head as user parameter. This
1095 * is because we use that page to record the changes.
1096 */
1097 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1098
1099#ifdef IN_RING0
1100 /* Maximum nr of modifications depends on the page type. */
1101 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1102 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1103 cMaxModifications = 4;
1104 else
1105 cMaxModifications = 24;
1106#else
1107 cMaxModifications = 48;
1108#endif
1109
1110 /*
1111 * Incremental page table updates should weigh more than random ones.
1112 * (Only applies when started from offset 0)
1113 */
1114 pVCpu->pgm.s.cPoolAccessHandler++;
1115 if ( pPage->GCPtrLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1116 && pPage->GCPtrLastAccessHandlerRip < pRegFrame->rip + 0x40
1117 && pvFault == (pPage->GCPtrLastAccessHandlerFault + pDis->Param1.cb)
1118 && pVCpu->pgm.s.cPoolAccessHandler == pPage->cLastAccessHandler + 1)
1119 {
1120 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1121 Assert(pPage->cModifications < 32000);
1122 pPage->cModifications = pPage->cModifications * 2;
1123 pPage->GCPtrLastAccessHandlerFault = pvFault;
1124 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1125 if (pPage->cModifications >= cMaxModifications)
1126 {
1127 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1128 fForcedFlush = true;
1129 }
1130 }
1131
1132 if (pPage->cModifications >= cMaxModifications)
1133 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1134
1135 /*
1136 * Check if it's worth dealing with.
1137 */
1138 bool fReused = false;
1139 bool fNotReusedNotForking = false;
1140 if ( ( pPage->cModifications < cMaxModifications /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1141 || pgmPoolIsPageLocked(pPage)
1142 )
1143 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1144 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1145 {
1146 /*
1147 * Simple instructions, no REP prefix.
1148 */
1149 if (!(pDis->fPrefix & (DISPREFIX_REP | DISPREFIX_REPNE)))
1150 {
1151 rc = pgmPoolAccessHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1152 if (fReused)
1153 goto flushPage;
1154
1155 /* A mov instruction to change the first page table entry will be remembered so we can detect
1156 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1157 */
1158 if ( rc == VINF_SUCCESS
1159 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1160 && pDis->pCurInstr->uOpcode == OP_MOV
1161 && (pvFault & PAGE_OFFSET_MASK) == 0)
1162 {
1163 pPage->GCPtrLastAccessHandlerFault = pvFault;
1164 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1165 pPage->GCPtrLastAccessHandlerRip = pRegFrame->rip;
1166 /* Make sure we don't kick out a page too quickly. */
1167 if (pPage->cModifications > 8)
1168 pPage->cModifications = 2;
1169 }
1170 else if (pPage->GCPtrLastAccessHandlerFault == pvFault)
1171 {
1172 /* ignore the 2nd write to this page table entry. */
1173 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1174 }
1175 else
1176 {
1177 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
1178 pPage->GCPtrLastAccessHandlerRip = 0;
1179 }
1180
1181 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1182 pgmUnlock(pVM);
1183 return rc;
1184 }
1185
1186 /*
1187 * Windows is frequently doing small memset() operations (netio test 4k+).
1188 * We have to deal with these or we'll kill the cache and performance.
1189 */
1190 if ( pDis->pCurInstr->uOpcode == OP_STOSWD
1191 && !pRegFrame->eflags.Bits.u1DF
1192 && pDis->uOpMode == pDis->uCpuMode
1193 && pDis->uAddrMode == pDis->uCpuMode)
1194 {
1195 bool fValidStosd = false;
1196
1197 if ( pDis->uCpuMode == DISCPUMODE_32BIT
1198 && pDis->fPrefix == DISPREFIX_REP
1199 && pRegFrame->ecx <= 0x20
1200 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1201 && !((uintptr_t)pvFault & 3)
1202 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1203 )
1204 {
1205 fValidStosd = true;
1206 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1207 }
1208 else
1209 if ( pDis->uCpuMode == DISCPUMODE_64BIT
1210 && pDis->fPrefix == (DISPREFIX_REP | DISPREFIX_REX)
1211 && pRegFrame->rcx <= 0x20
1212 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1213 && !((uintptr_t)pvFault & 7)
1214 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1215 )
1216 {
1217 fValidStosd = true;
1218 }
1219
1220 if (fValidStosd)
1221 {
1222 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1223 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1224 pgmUnlock(pVM);
1225 return rc;
1226 }
1227 }
1228
1229 /* REP prefix, don't bother. */
1230 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1231 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1232 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode, pDis->fPrefix));
1233 fNotReusedNotForking = true;
1234 }
1235
1236#if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1237 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1238 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1239 */
1240 if ( pPage->cModifications >= cMaxModifications
1241 && !fForcedFlush
1242 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1243 && ( fNotReusedNotForking
1244 || ( !pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
1245 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1246 )
1247 )
1248 {
1249 Assert(!pgmPoolIsPageLocked(pPage));
1250 Assert(pPage->fDirty == false);
1251
1252 /* Flush any monitored duplicates as we will disable write protection. */
1253 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1254 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1255 {
1256 PPGMPOOLPAGE pPageHead = pPage;
1257
1258 /* Find the monitor head. */
1259 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1260 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1261
1262 while (pPageHead)
1263 {
1264 unsigned idxNext = pPageHead->iMonitoredNext;
1265
1266 if (pPageHead != pPage)
1267 {
1268 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1269 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1270 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1271 AssertRC(rc2);
1272 }
1273
1274 if (idxNext == NIL_PGMPOOL_IDX)
1275 break;
1276
1277 pPageHead = &pPool->aPages[idxNext];
1278 }
1279 }
1280
1281 /* The flushing above might fail for locked pages, so double check. */
1282 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1283 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1284 {
1285 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1286
1287 /* Temporarily allow write access to the page table again. */
1288 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK, pPage->GCPhys & PAGE_BASE_GC_MASK);
1289 if (rc == VINF_SUCCESS)
1290 {
1291 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1292 AssertMsg(rc == VINF_SUCCESS
1293 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1294 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1295 || rc == VERR_PAGE_NOT_PRESENT,
1296 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1297# ifdef VBOX_STRICT
1298 pPage->GCPtrDirtyFault = pvFault;
1299# endif
1300
1301 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1302 pgmUnlock(pVM);
1303 return rc;
1304 }
1305 }
1306 }
1307#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1308
1309 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushModOverflow));
1310flushPage:
1311 /*
1312 * Not worth it, so flush it.
1313 *
1314 * If we considered it to be reused, don't go back to ring-3
1315 * to emulate failed instructions since we usually cannot
1316 * interpret then. This may be a bit risky, in which case
1317 * the reuse detection must be fixed.
1318 */
1319 rc = pgmPoolAccessHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1320 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1321 && fReused)
1322 {
1323 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1324 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1325 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1326 }
1327 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1328 pgmUnlock(pVM);
1329 return rc;
1330}
1331
1332# endif /* !IN_RING3 */
1333
1334# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1335
1336# if defined(VBOX_STRICT) && !defined(IN_RING3)
1337
1338/**
1339 * Check references to guest physical memory in a PAE / PAE page table.
1340 *
1341 * @param pPool The pool.
1342 * @param pPage The page.
1343 * @param pShwPT The shadow page table (mapping of the page).
1344 * @param pGstPT The guest page table.
1345 */
1346static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1347{
1348 unsigned cErrors = 0;
1349 int LastRc = -1; /* initialized to shut up gcc */
1350 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1351 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1352 PVM pVM = pPool->CTX_SUFF(pVM);
1353
1354#ifdef VBOX_STRICT
1355 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1356 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1357#endif
1358 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1359 {
1360 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1361 {
1362 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1363 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1364 if ( rc != VINF_SUCCESS
1365 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1366 {
1367 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1368 LastPTE = i;
1369 LastRc = rc;
1370 LastHCPhys = HCPhys;
1371 cErrors++;
1372
1373 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1374 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1375 AssertRC(rc);
1376
1377 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1378 {
1379 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1380
1381 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1382 {
1383 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1384
1385 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1386 {
1387 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1388 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1389 {
1390 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1391 }
1392 }
1393
1394 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1395 }
1396 }
1397 }
1398 }
1399 }
1400 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1401}
1402
1403
1404/**
1405 * Check references to guest physical memory in a PAE / 32-bit page table.
1406 *
1407 * @param pPool The pool.
1408 * @param pPage The page.
1409 * @param pShwPT The shadow page table (mapping of the page).
1410 * @param pGstPT The guest page table.
1411 */
1412static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1413{
1414 unsigned cErrors = 0;
1415 int LastRc = -1; /* initialized to shut up gcc */
1416 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1417 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1418 PVM pVM = pPool->CTX_SUFF(pVM);
1419
1420#ifdef VBOX_STRICT
1421 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1422 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1423#endif
1424 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1425 {
1426 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1427 {
1428 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1429 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1430 if ( rc != VINF_SUCCESS
1431 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1432 {
1433 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1434 LastPTE = i;
1435 LastRc = rc;
1436 LastHCPhys = HCPhys;
1437 cErrors++;
1438
1439 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1440 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1441 AssertRC(rc);
1442
1443 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1444 {
1445 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1446
1447 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1448 {
1449 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1450
1451 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1452 {
1453 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1454 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1455 {
1456 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1457 }
1458 }
1459
1460 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1461 }
1462 }
1463 }
1464 }
1465 }
1466 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1467}
1468
1469# endif /* VBOX_STRICT && !IN_RING3 */
1470
1471/**
1472 * Clear references to guest physical memory in a PAE / PAE page table.
1473 *
1474 * @returns nr of changed PTEs
1475 * @param pPool The pool.
1476 * @param pPage The page.
1477 * @param pShwPT The shadow page table (mapping of the page).
1478 * @param pGstPT The guest page table.
1479 * @param pOldGstPT The old cached guest page table.
1480 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1481 * @param pfFlush Flush reused page table (out)
1482 */
1483DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1484 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1485{
1486 unsigned cChanged = 0;
1487
1488#ifdef VBOX_STRICT
1489 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1490 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1491#endif
1492 *pfFlush = false;
1493
1494 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1495 {
1496 /* Check the new value written by the guest. If present and with a bogus physical address, then
1497 * it's fairly safe to assume the guest is reusing the PT.
1498 */
1499 if ( fAllowRemoval
1500 && pGstPT->a[i].n.u1Present)
1501 {
1502 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1503 {
1504 *pfFlush = true;
1505 return ++cChanged;
1506 }
1507 }
1508 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1509 {
1510 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1511 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1512 {
1513#ifdef VBOX_STRICT
1514 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1515 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1516 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1517#endif
1518 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1519 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1520 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1521 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1522
1523 if ( uHostAttr == uGuestAttr
1524 && fHostRW <= fGuestRW)
1525 continue;
1526 }
1527 cChanged++;
1528 /* Something was changed, so flush it. */
1529 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1530 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1531 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1532 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1533 }
1534 }
1535 return cChanged;
1536}
1537
1538
1539/**
1540 * Clear references to guest physical memory in a PAE / PAE page table.
1541 *
1542 * @returns nr of changed PTEs
1543 * @param pPool The pool.
1544 * @param pPage The page.
1545 * @param pShwPT The shadow page table (mapping of the page).
1546 * @param pGstPT The guest page table.
1547 * @param pOldGstPT The old cached guest page table.
1548 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1549 * @param pfFlush Flush reused page table (out)
1550 */
1551DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1552 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1553{
1554 unsigned cChanged = 0;
1555
1556#ifdef VBOX_STRICT
1557 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1558 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1559#endif
1560 *pfFlush = false;
1561
1562 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1563 {
1564 /* Check the new value written by the guest. If present and with a bogus physical address, then
1565 * it's fairly safe to assume the guest is reusing the PT.
1566 */
1567 if ( fAllowRemoval
1568 && pGstPT->a[i].n.u1Present)
1569 {
1570 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK))
1571 {
1572 *pfFlush = true;
1573 return ++cChanged;
1574 }
1575 }
1576 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1577 {
1578 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1579 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1580 {
1581#ifdef VBOX_STRICT
1582 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1583 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1584 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1585#endif
1586 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1587 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1588 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1589 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1590
1591 if ( uHostAttr == uGuestAttr
1592 && fHostRW <= fGuestRW)
1593 continue;
1594 }
1595 cChanged++;
1596 /* Something was changed, so flush it. */
1597 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1598 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1599 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1600 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1601 }
1602 }
1603 return cChanged;
1604}
1605
1606
1607/**
1608 * Flush a dirty page
1609 *
1610 * @param pVM Pointer to the VM.
1611 * @param pPool The pool.
1612 * @param idxSlot Dirty array slot index
1613 * @param fAllowRemoval Allow a reused page table to be removed
1614 */
1615static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1616{
1617 PPGMPOOLPAGE pPage;
1618 unsigned idxPage;
1619
1620 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1621 if (pPool->aDirtyPages[idxSlot].uIdx == NIL_PGMPOOL_IDX)
1622 return;
1623
1624 idxPage = pPool->aDirtyPages[idxSlot].uIdx;
1625 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1626 pPage = &pPool->aPages[idxPage];
1627 Assert(pPage->idx == idxPage);
1628 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1629
1630 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1631 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1632
1633#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1634 PVMCPU pVCpu = VMMGetCpu(pVM);
1635 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
1636#endif
1637
1638 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1639 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK);
1640 Assert(rc == VINF_SUCCESS);
1641 pPage->fDirty = false;
1642
1643#ifdef VBOX_STRICT
1644 uint64_t fFlags = 0;
1645 RTHCPHYS HCPhys;
1646 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->GCPtrDirtyFault, &fFlags, &HCPhys);
1647 AssertMsg( ( rc == VINF_SUCCESS
1648 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1649 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1650 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1651 || rc == VERR_PAGE_NOT_PRESENT,
1652 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->GCPtrDirtyFault, rc, fFlags));
1653#endif
1654
1655 /* Flush those PTEs that have changed. */
1656 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1657 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1658 void *pvGst;
1659 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1660 bool fFlush;
1661 unsigned cChanges;
1662
1663 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1664 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1665 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1666 else
1667 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1668 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1669
1670 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1671 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1672 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1673 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1674
1675 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1676 Assert(pPage->cModifications);
1677 if (cChanges < 4)
1678 pPage->cModifications = 1; /* must use > 0 here */
1679 else
1680 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1681
1682 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1683 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1684 pPool->idxFreeDirtyPage = idxSlot;
1685
1686 pPool->cDirtyPages--;
1687 pPool->aDirtyPages[idxSlot].uIdx = NIL_PGMPOOL_IDX;
1688 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1689 if (fFlush)
1690 {
1691 Assert(fAllowRemoval);
1692 Log(("Flush reused page table!\n"));
1693 pgmPoolFlushPage(pPool, pPage);
1694 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1695 }
1696 else
1697 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1698
1699#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1700 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
1701#endif
1702}
1703
1704
1705# ifndef IN_RING3
1706/**
1707 * Add a new dirty page
1708 *
1709 * @param pVM Pointer to the VM.
1710 * @param pPool The pool.
1711 * @param pPage The page.
1712 */
1713void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1714{
1715 unsigned idxFree;
1716
1717 PGM_LOCK_ASSERT_OWNER(pVM);
1718 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1719 Assert(!pPage->fDirty);
1720
1721 idxFree = pPool->idxFreeDirtyPage;
1722 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1723 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1724
1725 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1726 {
1727 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1728 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1729 }
1730 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1731 AssertMsg(pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1732
1733 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1734
1735 /*
1736 * Make a copy of the guest page table as we require valid GCPhys addresses
1737 * when removing references to physical pages.
1738 * (The HCPhys linear lookup is *extremely* expensive!)
1739 */
1740 void *pvGst;
1741 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1742 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst, (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT) ? PAGE_SIZE : PAGE_SIZE/2);
1743# ifdef VBOX_STRICT
1744 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1745 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1746 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1747 else
1748 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1749 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1750# endif
1751 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1752
1753 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1754 pPage->fDirty = true;
1755 pPage->idxDirtyEntry = (uint8_t)idxFree; Assert(pPage->idxDirtyEntry == idxFree);
1756 pPool->aDirtyPages[idxFree].uIdx = pPage->idx;
1757 pPool->cDirtyPages++;
1758
1759 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1760 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1761 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1762 {
1763 unsigned i;
1764 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1765 {
1766 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1767 if (pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX)
1768 {
1769 pPool->idxFreeDirtyPage = idxFree;
1770 break;
1771 }
1772 }
1773 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1774 }
1775
1776 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX);
1777 return;
1778}
1779# endif /* !IN_RING3 */
1780
1781
1782/**
1783 * Check if the specified page is dirty (not write monitored)
1784 *
1785 * @return dirty or not
1786 * @param pVM Pointer to the VM.
1787 * @param GCPhys Guest physical address
1788 */
1789bool pgmPoolIsDirtyPage(PVM pVM, RTGCPHYS GCPhys)
1790{
1791 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1792 PGM_LOCK_ASSERT_OWNER(pVM);
1793 if (!pPool->cDirtyPages)
1794 return false;
1795
1796 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1797
1798 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1799 {
1800 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
1801 {
1802 PPGMPOOLPAGE pPage;
1803 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
1804
1805 pPage = &pPool->aPages[idxPage];
1806 if (pPage->GCPhys == GCPhys)
1807 return true;
1808 }
1809 }
1810 return false;
1811}
1812
1813
1814/**
1815 * Reset all dirty pages by reinstating page monitoring.
1816 *
1817 * @param pVM Pointer to the VM.
1818 */
1819void pgmPoolResetDirtyPages(PVM pVM)
1820{
1821 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1822 PGM_LOCK_ASSERT_OWNER(pVM);
1823 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1824
1825 if (!pPool->cDirtyPages)
1826 return;
1827
1828 Log(("pgmPoolResetDirtyPages\n"));
1829 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1830 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1831
1832 pPool->idxFreeDirtyPage = 0;
1833 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1834 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1835 {
1836 unsigned i;
1837 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1838 {
1839 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
1840 {
1841 pPool->idxFreeDirtyPage = i;
1842 break;
1843 }
1844 }
1845 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1846 }
1847
1848 Assert(pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
1849 return;
1850}
1851
1852
1853/**
1854 * Invalidate the PT entry for the specified page
1855 *
1856 * @param pVM Pointer to the VM.
1857 * @param GCPtrPage Guest page to invalidate
1858 */
1859void pgmPoolResetDirtyPage(PVM pVM, RTGCPTR GCPtrPage)
1860{
1861 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1862 PGM_LOCK_ASSERT_OWNER(pVM);
1863 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1864
1865 if (!pPool->cDirtyPages)
1866 return;
1867
1868 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage));
1869 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1870 {
1871 }
1872}
1873
1874
1875/**
1876 * Reset all dirty pages by reinstating page monitoring.
1877 *
1878 * @param pVM Pointer to the VM.
1879 * @param GCPhysPT Physical address of the page table
1880 */
1881void pgmPoolInvalidateDirtyPage(PVM pVM, RTGCPHYS GCPhysPT)
1882{
1883 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1884 PGM_LOCK_ASSERT_OWNER(pVM);
1885 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1886 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
1887
1888 if (!pPool->cDirtyPages)
1889 return;
1890
1891 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1892
1893 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1894 {
1895 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
1896 {
1897 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
1898
1899 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1900 if (pPage->GCPhys == GCPhysPT)
1901 {
1902 idxDirtyPage = i;
1903 break;
1904 }
1905 }
1906 }
1907
1908 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
1909 {
1910 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
1911 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1912 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1913 {
1914 unsigned i;
1915 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1916 {
1917 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
1918 {
1919 pPool->idxFreeDirtyPage = i;
1920 break;
1921 }
1922 }
1923 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1924 }
1925 }
1926}
1927
1928# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1929
1930/**
1931 * Inserts a page into the GCPhys hash table.
1932 *
1933 * @param pPool The pool.
1934 * @param pPage The page.
1935 */
1936DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1937{
1938 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1939 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1940 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1941 pPage->iNext = pPool->aiHash[iHash];
1942 pPool->aiHash[iHash] = pPage->idx;
1943}
1944
1945
1946/**
1947 * Removes a page from the GCPhys hash table.
1948 *
1949 * @param pPool The pool.
1950 * @param pPage The page.
1951 */
1952DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1953{
1954 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1955 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1956 if (pPool->aiHash[iHash] == pPage->idx)
1957 pPool->aiHash[iHash] = pPage->iNext;
1958 else
1959 {
1960 uint16_t iPrev = pPool->aiHash[iHash];
1961 for (;;)
1962 {
1963 const int16_t i = pPool->aPages[iPrev].iNext;
1964 if (i == pPage->idx)
1965 {
1966 pPool->aPages[iPrev].iNext = pPage->iNext;
1967 break;
1968 }
1969 if (i == NIL_PGMPOOL_IDX)
1970 {
1971 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
1972 break;
1973 }
1974 iPrev = i;
1975 }
1976 }
1977 pPage->iNext = NIL_PGMPOOL_IDX;
1978}
1979
1980
1981/**
1982 * Frees up one cache page.
1983 *
1984 * @returns VBox status code.
1985 * @retval VINF_SUCCESS on success.
1986 * @param pPool The pool.
1987 * @param iUser The user index.
1988 */
1989static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1990{
1991#ifndef IN_RC
1992 const PVM pVM = pPool->CTX_SUFF(pVM);
1993#endif
1994 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1995 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1996
1997 /*
1998 * Select one page from the tail of the age list.
1999 */
2000 PPGMPOOLPAGE pPage;
2001 for (unsigned iLoop = 0; ; iLoop++)
2002 {
2003 uint16_t iToFree = pPool->iAgeTail;
2004 if (iToFree == iUser)
2005 iToFree = pPool->aPages[iToFree].iAgePrev;
2006/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2007 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2008 {
2009 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2010 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2011 {
2012 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2013 continue;
2014 iToFree = i;
2015 break;
2016 }
2017 }
2018*/
2019 Assert(iToFree != iUser);
2020 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
2021 pPage = &pPool->aPages[iToFree];
2022
2023 /*
2024 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2025 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2026 */
2027 if (!pgmPoolIsPageLocked(pPage))
2028 break;
2029 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2030 pgmPoolCacheUsed(pPool, pPage);
2031 AssertLogRelReturn(iLoop < 8192, VERR_PGM_POOL_TOO_MANY_LOOPS);
2032 }
2033
2034 /*
2035 * Found a usable page, flush it and return.
2036 */
2037 int rc = pgmPoolFlushPage(pPool, pPage);
2038 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2039 /* todo: find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2040 if (rc == VINF_SUCCESS)
2041 PGM_INVL_ALL_VCPU_TLBS(pVM);
2042 return rc;
2043}
2044
2045
2046/**
2047 * Checks if a kind mismatch is really a page being reused
2048 * or if it's just normal remappings.
2049 *
2050 * @returns true if reused and the cached page (enmKind1) should be flushed
2051 * @returns false if not reused.
2052 * @param enmKind1 The kind of the cached page.
2053 * @param enmKind2 The kind of the requested page.
2054 */
2055static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2056{
2057 switch (enmKind1)
2058 {
2059 /*
2060 * Never reuse them. There is no remapping in non-paging mode.
2061 */
2062 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2063 case PGMPOOLKIND_32BIT_PD_PHYS:
2064 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2065 case PGMPOOLKIND_PAE_PD_PHYS:
2066 case PGMPOOLKIND_PAE_PDPT_PHYS:
2067 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2068 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2069 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2070 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2071 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2072 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2073 return false;
2074
2075 /*
2076 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2077 */
2078 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2079 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2080 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2081 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2082 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2083 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2084 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2085 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2086 case PGMPOOLKIND_32BIT_PD:
2087 case PGMPOOLKIND_PAE_PDPT:
2088 switch (enmKind2)
2089 {
2090 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2091 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2092 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2093 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2094 case PGMPOOLKIND_64BIT_PML4:
2095 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2096 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2097 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2098 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2099 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2100 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2101 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2102 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2103 return true;
2104 default:
2105 return false;
2106 }
2107
2108 /*
2109 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2110 */
2111 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2112 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2113 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2114 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2115 case PGMPOOLKIND_64BIT_PML4:
2116 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2117 switch (enmKind2)
2118 {
2119 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2120 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2121 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2122 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2123 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2124 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2125 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2126 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2127 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2128 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2129 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2130 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2131 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2132 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2133 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2134 return true;
2135 default:
2136 return false;
2137 }
2138
2139 /*
2140 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2141 */
2142 case PGMPOOLKIND_ROOT_NESTED:
2143 return false;
2144
2145 default:
2146 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2147 }
2148}
2149
2150
2151/**
2152 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2153 *
2154 * @returns VBox status code.
2155 * @retval VINF_PGM_CACHED_PAGE on success.
2156 * @retval VERR_FILE_NOT_FOUND if not found.
2157 * @param pPool The pool.
2158 * @param GCPhys The GC physical address of the page we're gonna shadow.
2159 * @param enmKind The kind of mapping.
2160 * @param enmAccess Access type for the mapping (only relevant for big pages)
2161 * @param fA20Enabled Whether the CPU has the A20 gate enabled.
2162 * @param iUser The shadow page pool index of the user table.
2163 * @param iUserTable The index into the user table (shadowed).
2164 * @param ppPage Where to store the pointer to the page.
2165 */
2166static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
2167 uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2168{
2169 /*
2170 * Look up the GCPhys in the hash.
2171 */
2172 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2173 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2174 if (i != NIL_PGMPOOL_IDX)
2175 {
2176 do
2177 {
2178 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2179 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2180 if (pPage->GCPhys == GCPhys)
2181 {
2182 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2183 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess
2184 && pPage->fA20Enabled == fA20Enabled)
2185 {
2186 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2187 * doesn't flush it in case there are no more free use records.
2188 */
2189 pgmPoolCacheUsed(pPool, pPage);
2190
2191 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2192 if (RT_SUCCESS(rc))
2193 {
2194 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2195 *ppPage = pPage;
2196 if (pPage->cModifications)
2197 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2198 STAM_COUNTER_INC(&pPool->StatCacheHits);
2199 return VINF_PGM_CACHED_PAGE;
2200 }
2201 return rc;
2202 }
2203
2204 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2205 {
2206 /*
2207 * The kind is different. In some cases we should now flush the page
2208 * as it has been reused, but in most cases this is normal remapping
2209 * of PDs as PT or big pages using the GCPhys field in a slightly
2210 * different way than the other kinds.
2211 */
2212 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2213 {
2214 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2215 pgmPoolFlushPage(pPool, pPage);
2216 break;
2217 }
2218 }
2219 }
2220
2221 /* next */
2222 i = pPage->iNext;
2223 } while (i != NIL_PGMPOOL_IDX);
2224 }
2225
2226 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2227 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2228 return VERR_FILE_NOT_FOUND;
2229}
2230
2231
2232/**
2233 * Inserts a page into the cache.
2234 *
2235 * @param pPool The pool.
2236 * @param pPage The cached page.
2237 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2238 */
2239static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2240{
2241 /*
2242 * Insert into the GCPhys hash if the page is fit for that.
2243 */
2244 Assert(!pPage->fCached);
2245 if (fCanBeCached)
2246 {
2247 pPage->fCached = true;
2248 pgmPoolHashInsert(pPool, pPage);
2249 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2250 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2251 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2252 }
2253 else
2254 {
2255 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2256 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2257 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2258 }
2259
2260 /*
2261 * Insert at the head of the age list.
2262 */
2263 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2264 pPage->iAgeNext = pPool->iAgeHead;
2265 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2266 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2267 else
2268 pPool->iAgeTail = pPage->idx;
2269 pPool->iAgeHead = pPage->idx;
2270}
2271
2272
2273/**
2274 * Flushes a cached page.
2275 *
2276 * @param pPool The pool.
2277 * @param pPage The cached page.
2278 */
2279static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2280{
2281 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2282
2283 /*
2284 * Remove the page from the hash.
2285 */
2286 if (pPage->fCached)
2287 {
2288 pPage->fCached = false;
2289 pgmPoolHashRemove(pPool, pPage);
2290 }
2291 else
2292 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2293
2294 /*
2295 * Remove it from the age list.
2296 */
2297 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2298 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2299 else
2300 pPool->iAgeTail = pPage->iAgePrev;
2301 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2302 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2303 else
2304 pPool->iAgeHead = pPage->iAgeNext;
2305 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2306 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2307}
2308
2309
2310/**
2311 * Looks for pages sharing the monitor.
2312 *
2313 * @returns Pointer to the head page.
2314 * @returns NULL if not found.
2315 * @param pPool The Pool
2316 * @param pNewPage The page which is going to be monitored.
2317 */
2318static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2319{
2320 /*
2321 * Look up the GCPhys in the hash.
2322 */
2323 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2324 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2325 if (i == NIL_PGMPOOL_IDX)
2326 return NULL;
2327 do
2328 {
2329 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2330 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2331 && pPage != pNewPage)
2332 {
2333 switch (pPage->enmKind)
2334 {
2335 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2336 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2337 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2338 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2339 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2340 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2341 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2342 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2343 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2344 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2345 case PGMPOOLKIND_64BIT_PML4:
2346 case PGMPOOLKIND_32BIT_PD:
2347 case PGMPOOLKIND_PAE_PDPT:
2348 {
2349 /* find the head */
2350 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2351 {
2352 Assert(pPage->iMonitoredPrev != pPage->idx);
2353 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2354 }
2355 return pPage;
2356 }
2357
2358 /* ignore, no monitoring. */
2359 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2360 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2361 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2362 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2363 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2364 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2365 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2366 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2367 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2368 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2369 case PGMPOOLKIND_ROOT_NESTED:
2370 case PGMPOOLKIND_PAE_PD_PHYS:
2371 case PGMPOOLKIND_PAE_PDPT_PHYS:
2372 case PGMPOOLKIND_32BIT_PD_PHYS:
2373 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2374 break;
2375 default:
2376 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2377 }
2378 }
2379
2380 /* next */
2381 i = pPage->iNext;
2382 } while (i != NIL_PGMPOOL_IDX);
2383 return NULL;
2384}
2385
2386
2387/**
2388 * Enabled write monitoring of a guest page.
2389 *
2390 * @returns VBox status code.
2391 * @retval VINF_SUCCESS on success.
2392 * @param pPool The pool.
2393 * @param pPage The cached page.
2394 */
2395static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2396{
2397 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2398
2399 /*
2400 * Filter out the relevant kinds.
2401 */
2402 switch (pPage->enmKind)
2403 {
2404 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2405 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2406 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2407 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2408 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2409 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2410 case PGMPOOLKIND_64BIT_PML4:
2411 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2412 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2413 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2414 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2415 case PGMPOOLKIND_32BIT_PD:
2416 case PGMPOOLKIND_PAE_PDPT:
2417 break;
2418
2419 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2420 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2421 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2422 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2423 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2424 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2425 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2426 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2427 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2428 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2429 case PGMPOOLKIND_ROOT_NESTED:
2430 /* Nothing to monitor here. */
2431 return VINF_SUCCESS;
2432
2433 case PGMPOOLKIND_32BIT_PD_PHYS:
2434 case PGMPOOLKIND_PAE_PDPT_PHYS:
2435 case PGMPOOLKIND_PAE_PD_PHYS:
2436 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2437 /* Nothing to monitor here. */
2438 return VINF_SUCCESS;
2439 default:
2440 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2441 }
2442
2443 /*
2444 * Install handler.
2445 */
2446 int rc;
2447 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2448 if (pPageHead)
2449 {
2450 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2451 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2452
2453#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2454 if (pPageHead->fDirty)
2455 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirtyEntry, false /* do not remove */);
2456#endif
2457
2458 pPage->iMonitoredPrev = pPageHead->idx;
2459 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2460 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2461 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2462 pPageHead->iMonitoredNext = pPage->idx;
2463 rc = VINF_SUCCESS;
2464 }
2465 else
2466 {
2467 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2468 PVM pVM = pPool->CTX_SUFF(pVM);
2469 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2470 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
2471 GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK,
2472 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
2473 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
2474 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
2475 pPool->pszAccessHandler);
2476 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2477 * the heap size should suffice. */
2478 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2479 PVMCPU pVCpu = VMMGetCpu(pVM);
2480 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2481 }
2482 pPage->fMonitored = true;
2483 return rc;
2484}
2485
2486
2487/**
2488 * Disables write monitoring of a guest page.
2489 *
2490 * @returns VBox status code.
2491 * @retval VINF_SUCCESS on success.
2492 * @param pPool The pool.
2493 * @param pPage The cached page.
2494 */
2495static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2496{
2497 /*
2498 * Filter out the relevant kinds.
2499 */
2500 switch (pPage->enmKind)
2501 {
2502 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2503 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2504 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2505 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2506 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2507 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2508 case PGMPOOLKIND_64BIT_PML4:
2509 case PGMPOOLKIND_32BIT_PD:
2510 case PGMPOOLKIND_PAE_PDPT:
2511 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2512 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2513 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2514 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2515 break;
2516
2517 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2518 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2519 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2520 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2521 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2522 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2523 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2524 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2525 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2526 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2527 case PGMPOOLKIND_ROOT_NESTED:
2528 case PGMPOOLKIND_PAE_PD_PHYS:
2529 case PGMPOOLKIND_PAE_PDPT_PHYS:
2530 case PGMPOOLKIND_32BIT_PD_PHYS:
2531 /* Nothing to monitor here. */
2532 Assert(!pPage->fMonitored);
2533 return VINF_SUCCESS;
2534
2535 default:
2536 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2537 }
2538 Assert(pPage->fMonitored);
2539
2540 /*
2541 * Remove the page from the monitored list or uninstall it if last.
2542 */
2543 const PVM pVM = pPool->CTX_SUFF(pVM);
2544 int rc;
2545 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2546 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2547 {
2548 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2549 {
2550 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2551 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2552 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK,
2553 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
2554 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
2555 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
2556 pPool->pszAccessHandler);
2557 AssertFatalRCSuccess(rc);
2558 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2559 }
2560 else
2561 {
2562 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2563 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2564 {
2565 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2566 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2567 }
2568 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2569 rc = VINF_SUCCESS;
2570 }
2571 }
2572 else
2573 {
2574 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2575 AssertFatalRC(rc);
2576 PVMCPU pVCpu = VMMGetCpu(pVM);
2577 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2578 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2579 }
2580 pPage->fMonitored = false;
2581
2582 /*
2583 * Remove it from the list of modified pages (if in it).
2584 */
2585 pgmPoolMonitorModifiedRemove(pPool, pPage);
2586
2587 return rc;
2588}
2589
2590
2591/**
2592 * Inserts the page into the list of modified pages.
2593 *
2594 * @param pPool The pool.
2595 * @param pPage The page.
2596 */
2597void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2598{
2599 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2600 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2601 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2602 && pPool->iModifiedHead != pPage->idx,
2603 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2604 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2605 pPool->iModifiedHead, pPool->cModifiedPages));
2606
2607 pPage->iModifiedNext = pPool->iModifiedHead;
2608 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2609 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2610 pPool->iModifiedHead = pPage->idx;
2611 pPool->cModifiedPages++;
2612#ifdef VBOX_WITH_STATISTICS
2613 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2614 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2615#endif
2616}
2617
2618
2619/**
2620 * Removes the page from the list of modified pages and resets the
2621 * modification counter.
2622 *
2623 * @param pPool The pool.
2624 * @param pPage The page which is believed to be in the list of modified pages.
2625 */
2626static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2627{
2628 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2629 if (pPool->iModifiedHead == pPage->idx)
2630 {
2631 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2632 pPool->iModifiedHead = pPage->iModifiedNext;
2633 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2634 {
2635 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2636 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2637 }
2638 pPool->cModifiedPages--;
2639 }
2640 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2641 {
2642 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2643 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2644 {
2645 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2646 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2647 }
2648 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2649 pPool->cModifiedPages--;
2650 }
2651 else
2652 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2653 pPage->cModifications = 0;
2654}
2655
2656
2657/**
2658 * Zaps the list of modified pages, resetting their modification counters in the process.
2659 *
2660 * @param pVM Pointer to the VM.
2661 */
2662static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2663{
2664 pgmLock(pVM);
2665 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2666 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2667
2668 unsigned cPages = 0; NOREF(cPages);
2669
2670#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2671 pgmPoolResetDirtyPages(pVM);
2672#endif
2673
2674 uint16_t idx = pPool->iModifiedHead;
2675 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2676 while (idx != NIL_PGMPOOL_IDX)
2677 {
2678 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2679 idx = pPage->iModifiedNext;
2680 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2681 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2682 pPage->cModifications = 0;
2683 Assert(++cPages);
2684 }
2685 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2686 pPool->cModifiedPages = 0;
2687 pgmUnlock(pVM);
2688}
2689
2690
2691/**
2692 * Handle SyncCR3 pool tasks
2693 *
2694 * @returns VBox status code.
2695 * @retval VINF_SUCCESS if successfully added.
2696 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2697 * @param pVCpu Pointer to the VMCPU.
2698 * @remark Should only be used when monitoring is available, thus placed in
2699 * the PGMPOOL_WITH_MONITORING #ifdef.
2700 */
2701int pgmPoolSyncCR3(PVMCPU pVCpu)
2702{
2703 PVM pVM = pVCpu->CTX_SUFF(pVM);
2704 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2705
2706 /*
2707 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2708 * Occasionally we will have to clear all the shadow page tables because we wanted
2709 * to monitor a page which was mapped by too many shadowed page tables. This operation
2710 * sometimes referred to as a 'lightweight flush'.
2711 */
2712# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2713 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2714 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2715# else /* !IN_RING3 */
2716 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2717 {
2718 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2719 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2720
2721 /* Make sure all other VCPUs return to ring 3. */
2722 if (pVM->cCpus > 1)
2723 {
2724 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2725 PGM_INVL_ALL_VCPU_TLBS(pVM);
2726 }
2727 return VINF_PGM_SYNC_CR3;
2728 }
2729# endif /* !IN_RING3 */
2730 else
2731 {
2732 pgmPoolMonitorModifiedClearAll(pVM);
2733
2734 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2735 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2736 {
2737 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2738 return pgmPoolSyncCR3(pVCpu);
2739 }
2740 }
2741 return VINF_SUCCESS;
2742}
2743
2744
2745/**
2746 * Frees up at least one user entry.
2747 *
2748 * @returns VBox status code.
2749 * @retval VINF_SUCCESS if successfully added.
2750 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2751 * @param pPool The pool.
2752 * @param iUser The user index.
2753 */
2754static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2755{
2756 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2757 /*
2758 * Just free cached pages in a braindead fashion.
2759 */
2760 /** @todo walk the age list backwards and free the first with usage. */
2761 int rc = VINF_SUCCESS;
2762 do
2763 {
2764 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2765 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2766 rc = rc2;
2767 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2768 return rc;
2769}
2770
2771
2772/**
2773 * Inserts a page into the cache.
2774 *
2775 * This will create user node for the page, insert it into the GCPhys
2776 * hash, and insert it into the age list.
2777 *
2778 * @returns VBox status code.
2779 * @retval VINF_SUCCESS if successfully added.
2780 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2781 * @param pPool The pool.
2782 * @param pPage The cached page.
2783 * @param GCPhys The GC physical address of the page we're gonna shadow.
2784 * @param iUser The user index.
2785 * @param iUserTable The user table index.
2786 */
2787DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2788{
2789 int rc = VINF_SUCCESS;
2790 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2791
2792 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable));
2793
2794#ifdef VBOX_STRICT
2795 /*
2796 * Check that the entry doesn't already exists.
2797 */
2798 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2799 {
2800 uint16_t i = pPage->iUserHead;
2801 do
2802 {
2803 Assert(i < pPool->cMaxUsers);
2804 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2805 i = paUsers[i].iNext;
2806 } while (i != NIL_PGMPOOL_USER_INDEX);
2807 }
2808#endif
2809
2810 /*
2811 * Find free a user node.
2812 */
2813 uint16_t i = pPool->iUserFreeHead;
2814 if (i == NIL_PGMPOOL_USER_INDEX)
2815 {
2816 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2817 if (RT_FAILURE(rc))
2818 return rc;
2819 i = pPool->iUserFreeHead;
2820 }
2821
2822 /*
2823 * Unlink the user node from the free list,
2824 * initialize and insert it into the user list.
2825 */
2826 pPool->iUserFreeHead = paUsers[i].iNext;
2827 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2828 paUsers[i].iUser = iUser;
2829 paUsers[i].iUserTable = iUserTable;
2830 pPage->iUserHead = i;
2831
2832 /*
2833 * Insert into cache and enable monitoring of the guest page if enabled.
2834 *
2835 * Until we implement caching of all levels, including the CR3 one, we'll
2836 * have to make sure we don't try monitor & cache any recursive reuse of
2837 * a monitored CR3 page. Because all windows versions are doing this we'll
2838 * have to be able to do combined access monitoring, CR3 + PT and
2839 * PD + PT (guest PAE).
2840 *
2841 * Update:
2842 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2843 */
2844 const bool fCanBeMonitored = true;
2845 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2846 if (fCanBeMonitored)
2847 {
2848 rc = pgmPoolMonitorInsert(pPool, pPage);
2849 AssertRC(rc);
2850 }
2851 return rc;
2852}
2853
2854
2855/**
2856 * Adds a user reference to a page.
2857 *
2858 * This will move the page to the head of the
2859 *
2860 * @returns VBox status code.
2861 * @retval VINF_SUCCESS if successfully added.
2862 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2863 * @param pPool The pool.
2864 * @param pPage The cached page.
2865 * @param iUser The user index.
2866 * @param iUserTable The user table.
2867 */
2868static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2869{
2870 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2871
2872 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2873
2874# ifdef VBOX_STRICT
2875 /*
2876 * Check that the entry doesn't already exists. We only allow multiple
2877 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
2878 */
2879 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2880 {
2881 uint16_t i = pPage->iUserHead;
2882 do
2883 {
2884 Assert(i < pPool->cMaxUsers);
2885 AssertMsg(iUser != PGMPOOL_IDX_PD || iUser != PGMPOOL_IDX_PDPT || iUser != PGMPOOL_IDX_NESTED_ROOT || iUser != PGMPOOL_IDX_AMD64_CR3 ||
2886 paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2887 i = paUsers[i].iNext;
2888 } while (i != NIL_PGMPOOL_USER_INDEX);
2889 }
2890# endif
2891
2892 /*
2893 * Allocate a user node.
2894 */
2895 uint16_t i = pPool->iUserFreeHead;
2896 if (i == NIL_PGMPOOL_USER_INDEX)
2897 {
2898 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2899 if (RT_FAILURE(rc))
2900 return rc;
2901 i = pPool->iUserFreeHead;
2902 }
2903 pPool->iUserFreeHead = paUsers[i].iNext;
2904
2905 /*
2906 * Initialize the user node and insert it.
2907 */
2908 paUsers[i].iNext = pPage->iUserHead;
2909 paUsers[i].iUser = iUser;
2910 paUsers[i].iUserTable = iUserTable;
2911 pPage->iUserHead = i;
2912
2913# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2914 if (pPage->fDirty)
2915 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirtyEntry, false /* do not remove */);
2916# endif
2917
2918 /*
2919 * Tell the cache to update its replacement stats for this page.
2920 */
2921 pgmPoolCacheUsed(pPool, pPage);
2922 return VINF_SUCCESS;
2923}
2924
2925
2926/**
2927 * Frees a user record associated with a page.
2928 *
2929 * This does not clear the entry in the user table, it simply replaces the
2930 * user record to the chain of free records.
2931 *
2932 * @param pPool The pool.
2933 * @param HCPhys The HC physical address of the shadow page.
2934 * @param iUser The shadow page pool index of the user table.
2935 * @param iUserTable The index into the user table (shadowed).
2936 */
2937static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2938{
2939 /*
2940 * Unlink and free the specified user entry.
2941 */
2942 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2943
2944 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2945 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2946 uint16_t i = pPage->iUserHead;
2947 if ( i != NIL_PGMPOOL_USER_INDEX
2948 && paUsers[i].iUser == iUser
2949 && paUsers[i].iUserTable == iUserTable)
2950 {
2951 pPage->iUserHead = paUsers[i].iNext;
2952
2953 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2954 paUsers[i].iNext = pPool->iUserFreeHead;
2955 pPool->iUserFreeHead = i;
2956 return;
2957 }
2958
2959 /* General: Linear search. */
2960 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2961 while (i != NIL_PGMPOOL_USER_INDEX)
2962 {
2963 if ( paUsers[i].iUser == iUser
2964 && paUsers[i].iUserTable == iUserTable)
2965 {
2966 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2967 paUsers[iPrev].iNext = paUsers[i].iNext;
2968 else
2969 pPage->iUserHead = paUsers[i].iNext;
2970
2971 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2972 paUsers[i].iNext = pPool->iUserFreeHead;
2973 pPool->iUserFreeHead = i;
2974 return;
2975 }
2976 iPrev = i;
2977 i = paUsers[i].iNext;
2978 }
2979
2980 /* Fatal: didn't find it */
2981 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
2982 iUser, iUserTable, pPage->GCPhys));
2983}
2984
2985
2986/**
2987 * Gets the entry size of a shadow table.
2988 *
2989 * @param enmKind The kind of page.
2990 *
2991 * @returns The size of the entry in bytes. That is, 4 or 8.
2992 * @returns If the kind is not for a table, an assertion is raised and 0 is
2993 * returned.
2994 */
2995DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2996{
2997 switch (enmKind)
2998 {
2999 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3000 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3001 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3002 case PGMPOOLKIND_32BIT_PD:
3003 case PGMPOOLKIND_32BIT_PD_PHYS:
3004 return 4;
3005
3006 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3007 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3008 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3009 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3010 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3011 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3012 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3013 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3014 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3015 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3016 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3017 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3018 case PGMPOOLKIND_64BIT_PML4:
3019 case PGMPOOLKIND_PAE_PDPT:
3020 case PGMPOOLKIND_ROOT_NESTED:
3021 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3022 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3023 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3024 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3025 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3026 case PGMPOOLKIND_PAE_PD_PHYS:
3027 case PGMPOOLKIND_PAE_PDPT_PHYS:
3028 return 8;
3029
3030 default:
3031 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3032 }
3033}
3034
3035
3036/**
3037 * Gets the entry size of a guest table.
3038 *
3039 * @param enmKind The kind of page.
3040 *
3041 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3042 * @returns If the kind is not for a table, an assertion is raised and 0 is
3043 * returned.
3044 */
3045DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3046{
3047 switch (enmKind)
3048 {
3049 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3050 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3051 case PGMPOOLKIND_32BIT_PD:
3052 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3053 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3054 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3055 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3056 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3057 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3058 return 4;
3059
3060 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3061 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3062 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3063 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3064 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3065 case PGMPOOLKIND_64BIT_PML4:
3066 case PGMPOOLKIND_PAE_PDPT:
3067 return 8;
3068
3069 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3070 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3071 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3072 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3073 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3074 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3075 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3076 case PGMPOOLKIND_ROOT_NESTED:
3077 case PGMPOOLKIND_PAE_PD_PHYS:
3078 case PGMPOOLKIND_PAE_PDPT_PHYS:
3079 case PGMPOOLKIND_32BIT_PD_PHYS:
3080 /** @todo can we return 0? (nobody is calling this...) */
3081 AssertFailed();
3082 return 0;
3083
3084 default:
3085 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3086 }
3087}
3088
3089
3090/**
3091 * Checks one shadow page table entry for a mapping of a physical page.
3092 *
3093 * @returns true / false indicating removal of all relevant PTEs
3094 *
3095 * @param pVM Pointer to the VM.
3096 * @param pPhysPage The guest page in question.
3097 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3098 * @param iShw The shadow page table.
3099 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3100 */
3101static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3102{
3103 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3104 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3105 bool fRet = false;
3106
3107 /*
3108 * Assert sanity.
3109 */
3110 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3111 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3112 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3113
3114 /*
3115 * Then, clear the actual mappings to the page in the shadow PT.
3116 */
3117 switch (pPage->enmKind)
3118 {
3119 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3120 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3121 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3122 {
3123 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3124 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3125 uint32_t u32AndMask = 0;
3126 uint32_t u32OrMask = 0;
3127
3128 if (!fFlushPTEs)
3129 {
3130 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3131 {
3132 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
3133 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
3134 u32OrMask = X86_PTE_RW;
3135 u32AndMask = UINT32_MAX;
3136 fRet = true;
3137 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3138 break;
3139
3140 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3141 u32OrMask = 0;
3142 u32AndMask = ~X86_PTE_RW;
3143 fRet = true;
3144 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3145 break;
3146 default:
3147 /* (shouldn't be here, will assert below) */
3148 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3149 break;
3150 }
3151 }
3152 else
3153 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3154
3155 /* Update the counter if we're removing references. */
3156 if (!u32AndMask)
3157 {
3158 Assert(pPage->cPresent);
3159 Assert(pPool->cPresent);
3160 pPage->cPresent--;
3161 pPool->cPresent--;
3162 }
3163
3164 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3165 {
3166 X86PTE Pte;
3167
3168 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3169 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3170 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3171 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3172
3173 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3174 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3175 return fRet;
3176 }
3177#ifdef LOG_ENABLED
3178 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3179 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3180 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3181 {
3182 Log(("i=%d cFound=%d\n", i, ++cFound));
3183 }
3184#endif
3185 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3186 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3187 break;
3188 }
3189
3190 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3191 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3192 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3193 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3194 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3195 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3196 {
3197 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3198 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3199 uint64_t u64OrMask = 0;
3200 uint64_t u64AndMask = 0;
3201
3202 if (!fFlushPTEs)
3203 {
3204 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3205 {
3206 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3207 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3208 u64OrMask = X86_PTE_RW;
3209 u64AndMask = UINT64_MAX;
3210 fRet = true;
3211 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3212 break;
3213
3214 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3215 u64OrMask = 0;
3216 u64AndMask = ~(uint64_t)X86_PTE_RW;
3217 fRet = true;
3218 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3219 break;
3220
3221 default:
3222 /* (shouldn't be here, will assert below) */
3223 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3224 break;
3225 }
3226 }
3227 else
3228 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3229
3230 /* Update the counter if we're removing references. */
3231 if (!u64AndMask)
3232 {
3233 Assert(pPage->cPresent);
3234 Assert(pPool->cPresent);
3235 pPage->cPresent--;
3236 pPool->cPresent--;
3237 }
3238
3239 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3240 {
3241 X86PTEPAE Pte;
3242
3243 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3244 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3245 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3246 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3247
3248 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3249 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3250 return fRet;
3251 }
3252#ifdef LOG_ENABLED
3253 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3254 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3255 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3256 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3257 Log(("i=%d cFound=%d\n", i, ++cFound));
3258#endif
3259 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3260 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3261 break;
3262 }
3263
3264#ifdef PGM_WITH_LARGE_PAGES
3265 /* Large page case only. */
3266 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3267 {
3268 Assert(pVM->pgm.s.fNestedPaging);
3269
3270 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3271 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3272
3273 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3274 {
3275 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3276 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3277 pPD->a[iPte].u = 0;
3278 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3279
3280 /* Update the counter as we're removing references. */
3281 Assert(pPage->cPresent);
3282 Assert(pPool->cPresent);
3283 pPage->cPresent--;
3284 pPool->cPresent--;
3285
3286 return fRet;
3287 }
3288# ifdef LOG_ENABLED
3289 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3290 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3291 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3292 Log(("i=%d cFound=%d\n", i, ++cFound));
3293# endif
3294 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3295 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3296 break;
3297 }
3298
3299 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3300 case PGMPOOLKIND_PAE_PD_PHYS:
3301 {
3302 Assert(pVM->pgm.s.fNestedPaging);
3303
3304 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3305 PX86PD pPD = (PX86PD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3306
3307 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3308 {
3309 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3310 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3311 pPD->a[iPte].u = 0;
3312 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3313
3314 /* Update the counter as we're removing references. */
3315 Assert(pPage->cPresent);
3316 Assert(pPool->cPresent);
3317 pPage->cPresent--;
3318 pPool->cPresent--;
3319 return fRet;
3320 }
3321# ifdef LOG_ENABLED
3322 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3323 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3324 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3325 Log(("i=%d cFound=%d\n", i, ++cFound));
3326# endif
3327 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3328 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3329 break;
3330 }
3331#endif /* PGM_WITH_LARGE_PAGES */
3332
3333 default:
3334 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3335 }
3336
3337 /* not reached. */
3338#ifndef _MSC_VER
3339 return fRet;
3340#endif
3341}
3342
3343
3344/**
3345 * Scans one shadow page table for mappings of a physical page.
3346 *
3347 * @param pVM Pointer to the VM.
3348 * @param pPhysPage The guest page in question.
3349 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3350 * @param iShw The shadow page table.
3351 */
3352static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3353{
3354 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3355
3356 /* We should only come here with when there's only one reference to this physical page. */
3357 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3358
3359 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3360 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3361 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3362 if (!fKeptPTEs)
3363 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3364 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3365}
3366
3367
3368/**
3369 * Flushes a list of shadow page tables mapping the same physical page.
3370 *
3371 * @param pVM Pointer to the VM.
3372 * @param pPhysPage The guest page in question.
3373 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3374 * @param iPhysExt The physical cross reference extent list to flush.
3375 */
3376static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3377{
3378 PGM_LOCK_ASSERT_OWNER(pVM);
3379 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3380 bool fKeepList = false;
3381
3382 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3383 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3384
3385 const uint16_t iPhysExtStart = iPhysExt;
3386 PPGMPOOLPHYSEXT pPhysExt;
3387 do
3388 {
3389 Assert(iPhysExt < pPool->cMaxPhysExts);
3390 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3391 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3392 {
3393 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3394 {
3395 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3396 if (!fKeptPTEs)
3397 {
3398 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3399 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3400 }
3401 else
3402 fKeepList = true;
3403 }
3404 }
3405 /* next */
3406 iPhysExt = pPhysExt->iNext;
3407 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3408
3409 if (!fKeepList)
3410 {
3411 /* insert the list into the free list and clear the ram range entry. */
3412 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3413 pPool->iPhysExtFreeHead = iPhysExtStart;
3414 /* Invalidate the tracking data. */
3415 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3416 }
3417
3418 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3419}
3420
3421
3422/**
3423 * Flushes all shadow page table mappings of the given guest page.
3424 *
3425 * This is typically called when the host page backing the guest one has been
3426 * replaced or when the page protection was changed due to a guest access
3427 * caught by the monitoring.
3428 *
3429 * @returns VBox status code.
3430 * @retval VINF_SUCCESS if all references has been successfully cleared.
3431 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3432 * pool cleaning. FF and sync flags are set.
3433 *
3434 * @param pVM Pointer to the VM.
3435 * @param GCPhysPage GC physical address of the page in question
3436 * @param pPhysPage The guest page in question.
3437 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3438 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3439 * flushed, it is NOT touched if this isn't necessary.
3440 * The caller MUST initialized this to @a false.
3441 */
3442int pgmPoolTrackUpdateGCPhys(PVM pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3443{
3444 PVMCPU pVCpu = VMMGetCpu(pVM);
3445 pgmLock(pVM);
3446 int rc = VINF_SUCCESS;
3447
3448#ifdef PGM_WITH_LARGE_PAGES
3449 /* Is this page part of a large page? */
3450 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3451 {
3452 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3453 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3454
3455 /* Fetch the large page base. */
3456 PPGMPAGE pLargePage;
3457 if (GCPhysBase != GCPhysPage)
3458 {
3459 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3460 AssertFatal(pLargePage);
3461 }
3462 else
3463 pLargePage = pPhysPage;
3464
3465 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3466
3467 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3468 {
3469 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3470 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3471 pVM->pgm.s.cLargePagesDisabled++;
3472
3473 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3474 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3475
3476 *pfFlushTLBs = true;
3477 pgmUnlock(pVM);
3478 return rc;
3479 }
3480 }
3481#else
3482 NOREF(GCPhysPage);
3483#endif /* PGM_WITH_LARGE_PAGES */
3484
3485 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3486 if (u16)
3487 {
3488 /*
3489 * The zero page is currently screwing up the tracking and we'll
3490 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3491 * is defined, zero pages won't normally be mapped. Some kind of solution
3492 * will be needed for this problem of course, but it will have to wait...
3493 */
3494 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3495 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3496 rc = VINF_PGM_GCPHYS_ALIASED;
3497 else
3498 {
3499# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC) /** @todo we can drop this now. */
3500 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3501 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3502 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
3503# endif
3504
3505 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3506 {
3507 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3508 pgmPoolTrackFlushGCPhysPT(pVM,
3509 pPhysPage,
3510 fFlushPTEs,
3511 PGMPOOL_TD_GET_IDX(u16));
3512 }
3513 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3514 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3515 else
3516 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3517 *pfFlushTLBs = true;
3518
3519# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
3520 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
3521# endif
3522 }
3523 }
3524
3525 if (rc == VINF_PGM_GCPHYS_ALIASED)
3526 {
3527 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3528 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3529 rc = VINF_PGM_SYNC_CR3;
3530 }
3531 pgmUnlock(pVM);
3532 return rc;
3533}
3534
3535
3536/**
3537 * Scans all shadow page tables for mappings of a physical page.
3538 *
3539 * This may be slow, but it's most likely more efficient than cleaning
3540 * out the entire page pool / cache.
3541 *
3542 * @returns VBox status code.
3543 * @retval VINF_SUCCESS if all references has been successfully cleared.
3544 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3545 * a page pool cleaning.
3546 *
3547 * @param pVM Pointer to the VM.
3548 * @param pPhysPage The guest page in question.
3549 */
3550int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3551{
3552 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3553 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3554 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3555 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3556
3557 /*
3558 * There is a limit to what makes sense.
3559 */
3560 if ( pPool->cPresent > 1024
3561 && pVM->cCpus == 1)
3562 {
3563 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3564 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3565 return VINF_PGM_GCPHYS_ALIASED;
3566 }
3567
3568 /*
3569 * Iterate all the pages until we've encountered all that in use.
3570 * This is simple but not quite optimal solution.
3571 */
3572 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P; /** @todo drop X86_PTE_P here as we always test if present separately, anyway. */
3573 const uint32_t u32 = u64; /** @todo move into the 32BIT_PT_xx case */
3574 unsigned cLeft = pPool->cUsedPages;
3575 unsigned iPage = pPool->cCurPages;
3576 while (--iPage >= PGMPOOL_IDX_FIRST)
3577 {
3578 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3579 if ( pPage->GCPhys != NIL_RTGCPHYS
3580 && pPage->cPresent)
3581 {
3582 switch (pPage->enmKind)
3583 {
3584 /*
3585 * We only care about shadow page tables.
3586 */
3587 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3588 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3589 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3590 {
3591 unsigned cPresent = pPage->cPresent;
3592 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3593 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3594 if (pPT->a[i].n.u1Present)
3595 {
3596 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3597 {
3598 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3599 pPT->a[i].u = 0;
3600
3601 /* Update the counter as we're removing references. */
3602 Assert(pPage->cPresent);
3603 Assert(pPool->cPresent);
3604 pPage->cPresent--;
3605 pPool->cPresent--;
3606 }
3607 if (!--cPresent)
3608 break;
3609 }
3610 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3611 break;
3612 }
3613
3614 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3615 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3616 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3617 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3618 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3619 {
3620 unsigned cPresent = pPage->cPresent;
3621 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3622 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3623 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3624 {
3625 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3626 {
3627 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3628 PGMSHWPTEPAE_SET(pPT->a[i], 0); /// @todo why not atomic?
3629
3630 /* Update the counter as we're removing references. */
3631 Assert(pPage->cPresent);
3632 Assert(pPool->cPresent);
3633 pPage->cPresent--;
3634 pPool->cPresent--;
3635 }
3636 if (!--cPresent)
3637 break;
3638 }
3639 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3640 break;
3641 }
3642#ifndef IN_RC
3643 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3644 {
3645 unsigned cPresent = pPage->cPresent;
3646 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3647 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3648 if (pPT->a[i].n.u1Present)
3649 {
3650 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3651 {
3652 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3653 pPT->a[i].u = 0;
3654
3655 /* Update the counter as we're removing references. */
3656 Assert(pPage->cPresent);
3657 Assert(pPool->cPresent);
3658 pPage->cPresent--;
3659 pPool->cPresent--;
3660 }
3661 if (!--cPresent)
3662 break;
3663 }
3664 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3665 break;
3666 }
3667#endif
3668 }
3669 if (!--cLeft)
3670 break;
3671 }
3672 }
3673
3674 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3675 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3676
3677 /*
3678 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3679 */
3680 if (pPool->cPresent > 1024)
3681 {
3682 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3683 return VINF_PGM_GCPHYS_ALIASED;
3684 }
3685
3686 return VINF_SUCCESS;
3687}
3688
3689
3690/**
3691 * Clears the user entry in a user table.
3692 *
3693 * This is used to remove all references to a page when flushing it.
3694 */
3695static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3696{
3697 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3698 Assert(pUser->iUser < pPool->cCurPages);
3699 uint32_t iUserTable = pUser->iUserTable;
3700
3701 /*
3702 * Map the user page.
3703 */
3704 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3705 union
3706 {
3707 uint64_t *pau64;
3708 uint32_t *pau32;
3709 } u;
3710 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3711
3712 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3713
3714 /* Safety precaution in case we change the paging for other modes too in the future. */
3715 Assert(!pgmPoolIsPageLocked(pPage));
3716
3717#ifdef VBOX_STRICT
3718 /*
3719 * Some sanity checks.
3720 */
3721 switch (pUserPage->enmKind)
3722 {
3723 case PGMPOOLKIND_32BIT_PD:
3724 case PGMPOOLKIND_32BIT_PD_PHYS:
3725 Assert(iUserTable < X86_PG_ENTRIES);
3726 break;
3727 case PGMPOOLKIND_PAE_PDPT:
3728 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3729 case PGMPOOLKIND_PAE_PDPT_PHYS:
3730 Assert(iUserTable < 4);
3731 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3732 break;
3733 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3734 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3735 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3736 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3737 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3738 case PGMPOOLKIND_PAE_PD_PHYS:
3739 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3740 break;
3741 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3742 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3743 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3744 break;
3745 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3746 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3747 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3748 break;
3749 case PGMPOOLKIND_64BIT_PML4:
3750 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3751 /* GCPhys >> PAGE_SHIFT is the index here */
3752 break;
3753 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3754 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3755 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3756 break;
3757
3758 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3759 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3760 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3761 break;
3762
3763 case PGMPOOLKIND_ROOT_NESTED:
3764 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3765 break;
3766
3767 default:
3768 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3769 break;
3770 }
3771#endif /* VBOX_STRICT */
3772
3773 /*
3774 * Clear the entry in the user page.
3775 */
3776 switch (pUserPage->enmKind)
3777 {
3778 /* 32-bit entries */
3779 case PGMPOOLKIND_32BIT_PD:
3780 case PGMPOOLKIND_32BIT_PD_PHYS:
3781 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
3782 break;
3783
3784 /* 64-bit entries */
3785 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3786 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3787 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3788 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3789 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3790#ifdef IN_RC
3791 /*
3792 * In 32 bits PAE mode we *must* invalidate the TLB when changing a
3793 * PDPT entry; the CPU fetches them only during cr3 load, so any
3794 * non-present PDPT will continue to cause page faults.
3795 */
3796 ASMReloadCR3();
3797 /* no break */
3798#endif
3799 case PGMPOOLKIND_PAE_PD_PHYS:
3800 case PGMPOOLKIND_PAE_PDPT_PHYS:
3801 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3802 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3803 case PGMPOOLKIND_64BIT_PML4:
3804 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3805 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3806 case PGMPOOLKIND_PAE_PDPT:
3807 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3808 case PGMPOOLKIND_ROOT_NESTED:
3809 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3810 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3811 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
3812 break;
3813
3814 default:
3815 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3816 }
3817 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
3818}
3819
3820
3821/**
3822 * Clears all users of a page.
3823 */
3824static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3825{
3826 /*
3827 * Free all the user records.
3828 */
3829 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3830
3831 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3832 uint16_t i = pPage->iUserHead;
3833 while (i != NIL_PGMPOOL_USER_INDEX)
3834 {
3835 /* Clear enter in user table. */
3836 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3837
3838 /* Free it. */
3839 const uint16_t iNext = paUsers[i].iNext;
3840 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3841 paUsers[i].iNext = pPool->iUserFreeHead;
3842 pPool->iUserFreeHead = i;
3843
3844 /* Next. */
3845 i = iNext;
3846 }
3847 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3848}
3849
3850
3851/**
3852 * Allocates a new physical cross reference extent.
3853 *
3854 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3855 * @param pVM Pointer to the VM.
3856 * @param piPhysExt Where to store the phys ext index.
3857 */
3858PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3859{
3860 PGM_LOCK_ASSERT_OWNER(pVM);
3861 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3862 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3863 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3864 {
3865 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3866 return NULL;
3867 }
3868 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3869 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3870 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3871 *piPhysExt = iPhysExt;
3872 return pPhysExt;
3873}
3874
3875
3876/**
3877 * Frees a physical cross reference extent.
3878 *
3879 * @param pVM Pointer to the VM.
3880 * @param iPhysExt The extent to free.
3881 */
3882void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3883{
3884 PGM_LOCK_ASSERT_OWNER(pVM);
3885 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3886 Assert(iPhysExt < pPool->cMaxPhysExts);
3887 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3888 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3889 {
3890 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3891 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3892 }
3893 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3894 pPool->iPhysExtFreeHead = iPhysExt;
3895}
3896
3897
3898/**
3899 * Frees a physical cross reference extent.
3900 *
3901 * @param pVM Pointer to the VM.
3902 * @param iPhysExt The extent to free.
3903 */
3904void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3905{
3906 PGM_LOCK_ASSERT_OWNER(pVM);
3907 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3908
3909 const uint16_t iPhysExtStart = iPhysExt;
3910 PPGMPOOLPHYSEXT pPhysExt;
3911 do
3912 {
3913 Assert(iPhysExt < pPool->cMaxPhysExts);
3914 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3915 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3916 {
3917 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3918 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3919 }
3920
3921 /* next */
3922 iPhysExt = pPhysExt->iNext;
3923 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3924
3925 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3926 pPool->iPhysExtFreeHead = iPhysExtStart;
3927}
3928
3929
3930/**
3931 * Insert a reference into a list of physical cross reference extents.
3932 *
3933 * @returns The new tracking data for PGMPAGE.
3934 *
3935 * @param pVM Pointer to the VM.
3936 * @param iPhysExt The physical extent index of the list head.
3937 * @param iShwPT The shadow page table index.
3938 * @param iPte Page table entry
3939 *
3940 */
3941static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
3942{
3943 PGM_LOCK_ASSERT_OWNER(pVM);
3944 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3945 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3946
3947 /*
3948 * Special common cases.
3949 */
3950 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
3951 {
3952 paPhysExts[iPhysExt].aidx[1] = iShwPT;
3953 paPhysExts[iPhysExt].apte[1] = iPte;
3954 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
3955 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
3956 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3957 }
3958 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3959 {
3960 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3961 paPhysExts[iPhysExt].apte[2] = iPte;
3962 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
3963 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
3964 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3965 }
3966 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
3967
3968 /*
3969 * General treatment.
3970 */
3971 const uint16_t iPhysExtStart = iPhysExt;
3972 unsigned cMax = 15;
3973 for (;;)
3974 {
3975 Assert(iPhysExt < pPool->cMaxPhysExts);
3976 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3977 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3978 {
3979 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3980 paPhysExts[iPhysExt].apte[i] = iPte;
3981 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
3982 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
3983 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3984 }
3985 if (!--cMax)
3986 {
3987 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackOverflows);
3988 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3989 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
3990 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3991 }
3992
3993 /* advance */
3994 iPhysExt = paPhysExts[iPhysExt].iNext;
3995 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3996 break;
3997 }
3998
3999 /*
4000 * Add another extent to the list.
4001 */
4002 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4003 if (!pNew)
4004 {
4005 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackNoExtentsLeft);
4006 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4007 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4008 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4009 }
4010 pNew->iNext = iPhysExtStart;
4011 pNew->aidx[0] = iShwPT;
4012 pNew->apte[0] = iPte;
4013 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4014 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4015}
4016
4017
4018/**
4019 * Add a reference to guest physical page where extents are in use.
4020 *
4021 * @returns The new tracking data for PGMPAGE.
4022 *
4023 * @param pVM Pointer to the VM.
4024 * @param pPhysPage Pointer to the aPages entry in the ram range.
4025 * @param u16 The ram range flags (top 16-bits).
4026 * @param iShwPT The shadow page table index.
4027 * @param iPte Page table entry
4028 */
4029uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4030{
4031 pgmLock(pVM);
4032 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4033 {
4034 /*
4035 * Convert to extent list.
4036 */
4037 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4038 uint16_t iPhysExt;
4039 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4040 if (pPhysExt)
4041 {
4042 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4043 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliased);
4044 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4045 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4046 pPhysExt->aidx[1] = iShwPT;
4047 pPhysExt->apte[1] = iPte;
4048 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4049 }
4050 else
4051 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4052 }
4053 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4054 {
4055 /*
4056 * Insert into the extent list.
4057 */
4058 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4059 }
4060 else
4061 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedLots);
4062 pgmUnlock(pVM);
4063 return u16;
4064}
4065
4066
4067/**
4068 * Clear references to guest physical memory.
4069 *
4070 * @param pPool The pool.
4071 * @param pPage The page.
4072 * @param pPhysPage Pointer to the aPages entry in the ram range.
4073 * @param iPte Shadow PTE index
4074 */
4075void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4076{
4077 PVM pVM = pPool->CTX_SUFF(pVM);
4078 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4079 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4080
4081 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4082 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4083 {
4084 pgmLock(pVM);
4085
4086 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4087 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4088 do
4089 {
4090 Assert(iPhysExt < pPool->cMaxPhysExts);
4091
4092 /*
4093 * Look for the shadow page and check if it's all freed.
4094 */
4095 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4096 {
4097 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4098 && paPhysExts[iPhysExt].apte[i] == iPte)
4099 {
4100 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4101 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4102
4103 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4104 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4105 {
4106 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4107 pgmUnlock(pVM);
4108 return;
4109 }
4110
4111 /* we can free the node. */
4112 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4113 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4114 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4115 {
4116 /* lonely node */
4117 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4118 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4119 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4120 }
4121 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4122 {
4123 /* head */
4124 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4125 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4126 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4127 }
4128 else
4129 {
4130 /* in list */
4131 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4132 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4133 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4134 }
4135 iPhysExt = iPhysExtNext;
4136 pgmUnlock(pVM);
4137 return;
4138 }
4139 }
4140
4141 /* next */
4142 iPhysExtPrev = iPhysExt;
4143 iPhysExt = paPhysExts[iPhysExt].iNext;
4144 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4145
4146 pgmUnlock(pVM);
4147 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4148 }
4149 else /* nothing to do */
4150 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4151}
4152
4153/**
4154 * Clear references to guest physical memory.
4155 *
4156 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4157 * physical address is assumed to be correct, so the linear search can be
4158 * skipped and we can assert at an earlier point.
4159 *
4160 * @param pPool The pool.
4161 * @param pPage The page.
4162 * @param HCPhys The host physical address corresponding to the guest page.
4163 * @param GCPhys The guest physical address corresponding to HCPhys.
4164 * @param iPte Shadow PTE index
4165 */
4166static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4167{
4168 /*
4169 * Lookup the page and check if it checks out before derefing it.
4170 */
4171 PVM pVM = pPool->CTX_SUFF(pVM);
4172 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4173 if (pPhysPage)
4174 {
4175 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4176#ifdef LOG_ENABLED
4177 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4178 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4179#endif
4180 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4181 {
4182 Assert(pPage->cPresent);
4183 Assert(pPool->cPresent);
4184 pPage->cPresent--;
4185 pPool->cPresent--;
4186 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4187 return;
4188 }
4189
4190 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp\n",
4191 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage)));
4192 }
4193 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4194}
4195
4196
4197/**
4198 * Clear references to guest physical memory.
4199 *
4200 * @param pPool The pool.
4201 * @param pPage The page.
4202 * @param HCPhys The host physical address corresponding to the guest page.
4203 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4204 * @param iPte Shadow pte index
4205 */
4206void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4207{
4208 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4209
4210 /*
4211 * Try the hint first.
4212 */
4213 RTHCPHYS HCPhysHinted;
4214 PVM pVM = pPool->CTX_SUFF(pVM);
4215 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4216 if (pPhysPage)
4217 {
4218 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4219 Assert(HCPhysHinted);
4220 if (HCPhysHinted == HCPhys)
4221 {
4222 Assert(pPage->cPresent);
4223 Assert(pPool->cPresent);
4224 pPage->cPresent--;
4225 pPool->cPresent--;
4226 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4227 return;
4228 }
4229 }
4230 else
4231 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4232
4233 /*
4234 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4235 */
4236 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4237 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX);
4238 while (pRam)
4239 {
4240 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4241 while (iPage-- > 0)
4242 {
4243 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4244 {
4245 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4246 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4247 Assert(pPage->cPresent);
4248 Assert(pPool->cPresent);
4249 pPage->cPresent--;
4250 pPool->cPresent--;
4251 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4252 return;
4253 }
4254 }
4255 pRam = pRam->CTX_SUFF(pNext);
4256 }
4257
4258 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4259}
4260
4261
4262/**
4263 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4264 *
4265 * @param pPool The pool.
4266 * @param pPage The page.
4267 * @param pShwPT The shadow page table (mapping of the page).
4268 * @param pGstPT The guest page table.
4269 */
4270DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4271{
4272 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4273 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4274 {
4275 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4276 if (pShwPT->a[i].n.u1Present)
4277 {
4278 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4279 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4280 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & fPgMask, i);
4281 if (!pPage->cPresent)
4282 break;
4283 }
4284 }
4285}
4286
4287
4288/**
4289 * Clear references to guest physical memory in a PAE / 32-bit page table.
4290 *
4291 * @param pPool The pool.
4292 * @param pPage The page.
4293 * @param pShwPT The shadow page table (mapping of the page).
4294 * @param pGstPT The guest page table (just a half one).
4295 */
4296DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4297{
4298 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4299 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4300 {
4301 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4302 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4303 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4304 {
4305 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4306 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4307 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4308 if (!pPage->cPresent)
4309 break;
4310 }
4311 }
4312}
4313
4314
4315/**
4316 * Clear references to guest physical memory in a PAE / PAE page table.
4317 *
4318 * @param pPool The pool.
4319 * @param pPage The page.
4320 * @param pShwPT The shadow page table (mapping of the page).
4321 * @param pGstPT The guest page table.
4322 */
4323DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4324{
4325 RTGCPHYS const fPgMask = pPage->fA20Enabled ? X86_PTE_PAE_PG_MASK : X86_PTE_PAE_PG_MASK & ~RT_BIT_64(20);
4326 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4327 {
4328 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4329 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4330 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4331 {
4332 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4333 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4334 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4335 if (!pPage->cPresent)
4336 break;
4337 }
4338 }
4339}
4340
4341
4342/**
4343 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4344 *
4345 * @param pPool The pool.
4346 * @param pPage The page.
4347 * @param pShwPT The shadow page table (mapping of the page).
4348 */
4349DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4350{
4351 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4352 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4353 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4354 {
4355 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4356 if (pShwPT->a[i].n.u1Present)
4357 {
4358 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4359 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4360 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4361 if (!pPage->cPresent)
4362 break;
4363 }
4364 }
4365}
4366
4367
4368/**
4369 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4370 *
4371 * @param pPool The pool.
4372 * @param pPage The page.
4373 * @param pShwPT The shadow page table (mapping of the page).
4374 */
4375DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4376{
4377 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4378 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4379 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4380 {
4381 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4382 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4383 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4384 {
4385 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4386 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4387 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys & GCPhysA20Mask, i);
4388 if (!pPage->cPresent)
4389 break;
4390 }
4391 }
4392}
4393
4394
4395/**
4396 * Clear references to shadowed pages in an EPT page table.
4397 *
4398 * @param pPool The pool.
4399 * @param pPage The page.
4400 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4401 */
4402DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4403{
4404 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4405 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4406 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4407 {
4408 Assert((pShwPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4409 if (pShwPT->a[i].n.u1Present)
4410 {
4411 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4412 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4413 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4414 if (!pPage->cPresent)
4415 break;
4416 }
4417 }
4418}
4419
4420
4421/**
4422 * Clear references to shadowed pages in a 32 bits page directory.
4423 *
4424 * @param pPool The pool.
4425 * @param pPage The page.
4426 * @param pShwPD The shadow page directory (mapping of the page).
4427 */
4428DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4429{
4430 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4431 {
4432 Assert(!(pShwPD->a[i].u & RT_BIT_32(9)));
4433 if ( pShwPD->a[i].n.u1Present
4434 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4435 )
4436 {
4437 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4438 if (pSubPage)
4439 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4440 else
4441 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4442 }
4443 }
4444}
4445
4446
4447/**
4448 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4449 *
4450 * @param pPool The pool.
4451 * @param pPage The page.
4452 * @param pShwPD The shadow page directory (mapping of the page).
4453 */
4454DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4455{
4456 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4457 {
4458 if ( pShwPD->a[i].n.u1Present
4459 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING))
4460 {
4461#ifdef PGM_WITH_LARGE_PAGES
4462 if (pShwPD->a[i].b.u1Size)
4463 {
4464 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4465 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4466 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK,
4467 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4468 i);
4469 }
4470 else
4471#endif
4472 {
4473 Assert((pShwPD->a[i].u & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4474 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4475 if (pSubPage)
4476 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4477 else
4478 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4479 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4480 }
4481 }
4482 }
4483}
4484
4485
4486/**
4487 * Clear references to shadowed pages in a PAE page directory pointer table.
4488 *
4489 * @param pPool The pool.
4490 * @param pPage The page.
4491 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4492 */
4493DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4494{
4495 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4496 {
4497 Assert((pShwPDPT->a[i].u & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4498 if ( pShwPDPT->a[i].n.u1Present
4499 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4500 )
4501 {
4502 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4503 if (pSubPage)
4504 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4505 else
4506 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4507 }
4508 }
4509}
4510
4511
4512/**
4513 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4514 *
4515 * @param pPool The pool.
4516 * @param pPage The page.
4517 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4518 */
4519DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4520{
4521 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4522 {
4523 Assert((pShwPDPT->a[i].u & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4524 if (pShwPDPT->a[i].n.u1Present)
4525 {
4526 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4527 if (pSubPage)
4528 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4529 else
4530 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4531 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4532 }
4533 }
4534}
4535
4536
4537/**
4538 * Clear references to shadowed pages in a 64-bit level 4 page table.
4539 *
4540 * @param pPool The pool.
4541 * @param pPage The page.
4542 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4543 */
4544DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4545{
4546 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4547 {
4548 Assert((pShwPML4->a[i].u & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4549 if (pShwPML4->a[i].n.u1Present)
4550 {
4551 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4552 if (pSubPage)
4553 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4554 else
4555 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4556 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4557 }
4558 }
4559}
4560
4561
4562/**
4563 * Clear references to shadowed pages in an EPT page directory.
4564 *
4565 * @param pPool The pool.
4566 * @param pPage The page.
4567 * @param pShwPD The shadow page directory (mapping of the page).
4568 */
4569DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4570{
4571 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4572 {
4573 Assert((pShwPD->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4574 if (pShwPD->a[i].n.u1Present)
4575 {
4576#ifdef PGM_WITH_LARGE_PAGES
4577 if (pShwPD->a[i].b.u1Size)
4578 {
4579 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4580 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4581 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK,
4582 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4583 i);
4584 }
4585 else
4586#endif
4587 {
4588 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4589 if (pSubPage)
4590 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4591 else
4592 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4593 }
4594 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4595 }
4596 }
4597}
4598
4599
4600/**
4601 * Clear references to shadowed pages in an EPT page directory pointer table.
4602 *
4603 * @param pPool The pool.
4604 * @param pPage The page.
4605 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4606 */
4607DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4608{
4609 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4610 {
4611 Assert((pShwPDPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4612 if (pShwPDPT->a[i].n.u1Present)
4613 {
4614 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4615 if (pSubPage)
4616 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4617 else
4618 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4619 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4620 }
4621 }
4622}
4623
4624
4625/**
4626 * Clears all references made by this page.
4627 *
4628 * This includes other shadow pages and GC physical addresses.
4629 *
4630 * @param pPool The pool.
4631 * @param pPage The page.
4632 */
4633static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4634{
4635 /*
4636 * Map the shadow page and take action according to the page kind.
4637 */
4638 PVM pVM = pPool->CTX_SUFF(pVM);
4639 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4640 switch (pPage->enmKind)
4641 {
4642 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4643 {
4644 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4645 void *pvGst;
4646 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4647 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4648 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4649 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4650 break;
4651 }
4652
4653 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4654 {
4655 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4656 void *pvGst;
4657 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4658 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
4659 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4660 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4661 break;
4662 }
4663
4664 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4665 {
4666 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4667 void *pvGst;
4668 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4669 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
4670 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4671 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4672 break;
4673 }
4674
4675 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4676 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4677 {
4678 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4679 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4680 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4681 break;
4682 }
4683
4684 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4685 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4686 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4687 {
4688 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4689 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
4690 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4691 break;
4692 }
4693
4694 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4695 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4696 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4697 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4698 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4699 case PGMPOOLKIND_PAE_PD_PHYS:
4700 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4701 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4702 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4703 break;
4704
4705 case PGMPOOLKIND_32BIT_PD_PHYS:
4706 case PGMPOOLKIND_32BIT_PD:
4707 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4708 break;
4709
4710 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4711 case PGMPOOLKIND_PAE_PDPT:
4712 case PGMPOOLKIND_PAE_PDPT_PHYS:
4713 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4714 break;
4715
4716 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4717 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4718 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4719 break;
4720
4721 case PGMPOOLKIND_64BIT_PML4:
4722 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4723 break;
4724
4725 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4726 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4727 break;
4728
4729 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4730 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4731 break;
4732
4733 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4734 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4735 break;
4736
4737 default:
4738 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4739 }
4740
4741 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4742 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4743 ASMMemZeroPage(pvShw);
4744 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4745 pPage->fZeroed = true;
4746 Assert(!pPage->cPresent);
4747 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
4748}
4749
4750
4751/**
4752 * Flushes a pool page.
4753 *
4754 * This moves the page to the free list after removing all user references to it.
4755 *
4756 * @returns VBox status code.
4757 * @retval VINF_SUCCESS on success.
4758 * @param pPool The pool.
4759 * @param HCPhys The HC physical address of the shadow page.
4760 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4761 */
4762int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4763{
4764 PVM pVM = pPool->CTX_SUFF(pVM);
4765 bool fFlushRequired = false;
4766
4767 int rc = VINF_SUCCESS;
4768 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4769 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4770 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4771
4772 /*
4773 * Quietly reject any attempts at flushing any of the special root pages.
4774 */
4775 if (pPage->idx < PGMPOOL_IDX_FIRST)
4776 {
4777 AssertFailed(); /* can no longer happen */
4778 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4779 return VINF_SUCCESS;
4780 }
4781
4782 pgmLock(pVM);
4783
4784 /*
4785 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4786 */
4787 if (pgmPoolIsPageLocked(pPage))
4788 {
4789 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4790 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4791 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4792 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4793 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4794 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4795 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4796 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4797 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
4798 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
4799 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4800 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4801 pgmUnlock(pVM);
4802 return VINF_SUCCESS;
4803 }
4804
4805#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4806 /* Start a subset so we won't run out of mapping space. */
4807 PVMCPU pVCpu = VMMGetCpu(pVM);
4808 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
4809#endif
4810
4811 /*
4812 * Mark the page as being in need of an ASMMemZeroPage().
4813 */
4814 pPage->fZeroed = false;
4815
4816#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4817 if (pPage->fDirty)
4818 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirtyEntry, false /* do not remove */);
4819#endif
4820
4821 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4822 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4823 fFlushRequired = true;
4824
4825 /*
4826 * Clear the page.
4827 */
4828 pgmPoolTrackClearPageUsers(pPool, pPage);
4829 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4830 pgmPoolTrackDeref(pPool, pPage);
4831 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4832
4833 /*
4834 * Flush it from the cache.
4835 */
4836 pgmPoolCacheFlushPage(pPool, pPage);
4837
4838#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4839 /* Heavy stuff done. */
4840 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
4841#endif
4842
4843 /*
4844 * Deregistering the monitoring.
4845 */
4846 if (pPage->fMonitored)
4847 rc = pgmPoolMonitorFlush(pPool, pPage);
4848
4849 /*
4850 * Free the page.
4851 */
4852 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4853 pPage->iNext = pPool->iFreeHead;
4854 pPool->iFreeHead = pPage->idx;
4855 pPage->enmKind = PGMPOOLKIND_FREE;
4856 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4857 pPage->GCPhys = NIL_RTGCPHYS;
4858 pPage->fReusedFlushPending = false;
4859
4860 pPool->cUsedPages--;
4861
4862 /* Flush the TLBs of all VCPUs if required. */
4863 if ( fFlushRequired
4864 && fFlush)
4865 {
4866 PGM_INVL_ALL_VCPU_TLBS(pVM);
4867 }
4868
4869 pgmUnlock(pVM);
4870 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4871 return rc;
4872}
4873
4874
4875/**
4876 * Frees a usage of a pool page.
4877 *
4878 * The caller is responsible to updating the user table so that it no longer
4879 * references the shadow page.
4880 *
4881 * @param pPool The pool.
4882 * @param HCPhys The HC physical address of the shadow page.
4883 * @param iUser The shadow page pool index of the user table.
4884 * @param iUserTable The index into the user table (shadowed).
4885 */
4886void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4887{
4888 PVM pVM = pPool->CTX_SUFF(pVM);
4889
4890 STAM_PROFILE_START(&pPool->StatFree, a);
4891 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
4892 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4893 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4894 pgmLock(pVM);
4895 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4896 if (!pPage->fCached)
4897 pgmPoolFlushPage(pPool, pPage);
4898 pgmUnlock(pVM);
4899 STAM_PROFILE_STOP(&pPool->StatFree, a);
4900}
4901
4902
4903/**
4904 * Makes one or more free page free.
4905 *
4906 * @returns VBox status code.
4907 * @retval VINF_SUCCESS on success.
4908 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4909 *
4910 * @param pPool The pool.
4911 * @param enmKind Page table kind
4912 * @param iUser The user of the page.
4913 */
4914static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4915{
4916 PVM pVM = pPool->CTX_SUFF(pVM);
4917 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%d\n", iUser));
4918 NOREF(enmKind);
4919
4920 /*
4921 * If the pool isn't full grown yet, expand it.
4922 */
4923 if ( pPool->cCurPages < pPool->cMaxPages
4924#if defined(IN_RC)
4925 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
4926 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4927 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
4928#endif
4929 )
4930 {
4931 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4932#ifdef IN_RING3
4933 int rc = PGMR3PoolGrow(pVM);
4934#else
4935 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
4936#endif
4937 if (RT_FAILURE(rc))
4938 return rc;
4939 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4940 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4941 return VINF_SUCCESS;
4942 }
4943
4944 /*
4945 * Free one cached page.
4946 */
4947 return pgmPoolCacheFreeOne(pPool, iUser);
4948}
4949
4950
4951/**
4952 * Allocates a page from the pool.
4953 *
4954 * This page may actually be a cached page and not in need of any processing
4955 * on the callers part.
4956 *
4957 * @returns VBox status code.
4958 * @retval VINF_SUCCESS if a NEW page was allocated.
4959 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4960 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4961 *
4962 * @param pVM Pointer to the VM.
4963 * @param GCPhys The GC physical address of the page we're gonna shadow.
4964 * For 4MB and 2MB PD entries, it's the first address the
4965 * shadow PT is covering.
4966 * @param enmKind The kind of mapping.
4967 * @param enmAccess Access type for the mapping (only relevant for big pages)
4968 * @param fA20Enabled Whether the A20 gate is enabled or not.
4969 * @param iUser The shadow page pool index of the user table.
4970 * @param iUserTable The index into the user table (shadowed).
4971 * @param fLockPage Lock the page
4972 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4973 */
4974int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
4975 uint16_t iUser, uint32_t iUserTable, bool fLockPage, PPPGMPOOLPAGE ppPage)
4976{
4977 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4978 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4979 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4980 *ppPage = NULL;
4981 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4982 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4983 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4984
4985 pgmLock(pVM);
4986
4987 if (pPool->fCacheEnabled)
4988 {
4989 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, fA20Enabled, iUser, iUserTable, ppPage);
4990 if (RT_SUCCESS(rc2))
4991 {
4992 if (fLockPage)
4993 pgmPoolLockPage(pPool, *ppPage);
4994 pgmUnlock(pVM);
4995 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4996 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4997 return rc2;
4998 }
4999 }
5000
5001 /*
5002 * Allocate a new one.
5003 */
5004 int rc = VINF_SUCCESS;
5005 uint16_t iNew = pPool->iFreeHead;
5006 if (iNew == NIL_PGMPOOL_IDX)
5007 {
5008 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
5009 if (RT_FAILURE(rc))
5010 {
5011 pgmUnlock(pVM);
5012 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
5013 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5014 return rc;
5015 }
5016 iNew = pPool->iFreeHead;
5017 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_PGM_POOL_IPE);
5018 }
5019
5020 /* unlink the free head */
5021 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5022 pPool->iFreeHead = pPage->iNext;
5023 pPage->iNext = NIL_PGMPOOL_IDX;
5024
5025 /*
5026 * Initialize it.
5027 */
5028 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5029 pPage->enmKind = enmKind;
5030 pPage->enmAccess = enmAccess;
5031 pPage->GCPhys = GCPhys;
5032 pPage->fA20Enabled = fA20Enabled;
5033 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5034 pPage->fMonitored = false;
5035 pPage->fCached = false;
5036 pPage->fDirty = false;
5037 pPage->fReusedFlushPending = false;
5038 pPage->cModifications = 0;
5039 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5040 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5041 pPage->cPresent = 0;
5042 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5043 pPage->idxDirtyEntry = 0;
5044 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5045 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5046 pPage->cLastAccessHandler = 0;
5047 pPage->cLocked = 0;
5048# ifdef VBOX_STRICT
5049 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5050# endif
5051
5052 /*
5053 * Insert into the tracking and cache. If this fails, free the page.
5054 */
5055 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5056 if (RT_FAILURE(rc3))
5057 {
5058 pPool->cUsedPages--;
5059 pPage->enmKind = PGMPOOLKIND_FREE;
5060 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5061 pPage->GCPhys = NIL_RTGCPHYS;
5062 pPage->iNext = pPool->iFreeHead;
5063 pPool->iFreeHead = pPage->idx;
5064 pgmUnlock(pVM);
5065 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5066 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
5067 return rc3;
5068 }
5069
5070 /*
5071 * Commit the allocation, clear the page and return.
5072 */
5073#ifdef VBOX_WITH_STATISTICS
5074 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5075 pPool->cUsedPagesHigh = pPool->cUsedPages;
5076#endif
5077
5078 if (!pPage->fZeroed)
5079 {
5080 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5081 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5082 ASMMemZeroPage(pv);
5083 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5084 }
5085
5086 *ppPage = pPage;
5087 if (fLockPage)
5088 pgmPoolLockPage(pPool, pPage);
5089 pgmUnlock(pVM);
5090 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5091 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5092 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5093 return rc;
5094}
5095
5096
5097/**
5098 * Frees a usage of a pool page.
5099 *
5100 * @param pVM Pointer to the VM.
5101 * @param HCPhys The HC physical address of the shadow page.
5102 * @param iUser The shadow page pool index of the user table.
5103 * @param iUserTable The index into the user table (shadowed).
5104 */
5105void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5106{
5107 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5108 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5109 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5110}
5111
5112
5113/**
5114 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5115 *
5116 * @returns Pointer to the shadow page structure.
5117 * @param pPool The pool.
5118 * @param HCPhys The HC physical address of the shadow page.
5119 */
5120PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5121{
5122 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5123
5124 /*
5125 * Look up the page.
5126 */
5127 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5128
5129 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5130 return pPage;
5131}
5132
5133
5134/**
5135 * Internal worker for finding a page for debugging purposes, no assertions.
5136 *
5137 * @returns Pointer to the shadow page structure. NULL on if not found.
5138 * @param pPool The pool.
5139 * @param HCPhys The HC physical address of the shadow page.
5140 */
5141PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5142{
5143 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5144 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5145}
5146
5147#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5148
5149/**
5150 * Flush the specified page if present
5151 *
5152 * @param pVM Pointer to the VM.
5153 * @param GCPhys Guest physical address of the page to flush
5154 */
5155void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5156{
5157 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5158
5159 VM_ASSERT_EMT(pVM);
5160
5161 /*
5162 * Look up the GCPhys in the hash.
5163 */
5164 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5165 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5166 if (i == NIL_PGMPOOL_IDX)
5167 return;
5168
5169 do
5170 {
5171 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5172 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5173 {
5174 switch (pPage->enmKind)
5175 {
5176 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5177 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5178 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5179 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5180 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5181 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5182 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5183 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5184 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5185 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5186 case PGMPOOLKIND_64BIT_PML4:
5187 case PGMPOOLKIND_32BIT_PD:
5188 case PGMPOOLKIND_PAE_PDPT:
5189 {
5190 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5191#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5192 if (pPage->fDirty)
5193 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5194 else
5195#endif
5196 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5197 Assert(!pgmPoolIsPageLocked(pPage));
5198 pgmPoolMonitorChainFlush(pPool, pPage);
5199 return;
5200 }
5201
5202 /* ignore, no monitoring. */
5203 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5204 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5205 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5206 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5207 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5208 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5209 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5210 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5211 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5212 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5213 case PGMPOOLKIND_ROOT_NESTED:
5214 case PGMPOOLKIND_PAE_PD_PHYS:
5215 case PGMPOOLKIND_PAE_PDPT_PHYS:
5216 case PGMPOOLKIND_32BIT_PD_PHYS:
5217 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5218 break;
5219
5220 default:
5221 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5222 }
5223 }
5224
5225 /* next */
5226 i = pPage->iNext;
5227 } while (i != NIL_PGMPOOL_IDX);
5228 return;
5229}
5230
5231#endif /* IN_RING3 */
5232#ifdef IN_RING3
5233
5234/**
5235 * Reset CPU on hot plugging.
5236 *
5237 * @param pVM Pointer to the VM.
5238 * @param pVCpu The virtual CPU.
5239 */
5240void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5241{
5242 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5243
5244 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5245 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5246 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5247}
5248
5249
5250/**
5251 * Flushes the entire cache.
5252 *
5253 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5254 * this and execute this CR3 flush.
5255 *
5256 * @param pPool The pool.
5257 */
5258void pgmR3PoolReset(PVM pVM)
5259{
5260 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5261
5262 PGM_LOCK_ASSERT_OWNER(pVM);
5263 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5264 LogFlow(("pgmR3PoolReset:\n"));
5265
5266 /*
5267 * If there are no pages in the pool, there is nothing to do.
5268 */
5269 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5270 {
5271 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5272 return;
5273 }
5274
5275 /*
5276 * Exit the shadow mode since we're going to clear everything,
5277 * including the root page.
5278 */
5279 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5280 pgmR3ExitShadowModeBeforePoolFlush(&pVM->aCpus[i]);
5281
5282 /*
5283 * Nuke the free list and reinsert all pages into it.
5284 */
5285 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5286 {
5287 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5288
5289 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
5290 if (pPage->fMonitored)
5291 pgmPoolMonitorFlush(pPool, pPage);
5292 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5293 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5294 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5295 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5296 pPage->cModifications = 0;
5297 pPage->GCPhys = NIL_RTGCPHYS;
5298 pPage->enmKind = PGMPOOLKIND_FREE;
5299 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5300 Assert(pPage->idx == i);
5301 pPage->iNext = i + 1;
5302 pPage->fA20Enabled = true;
5303 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5304 pPage->fSeenNonGlobal = false;
5305 pPage->fMonitored = false;
5306 pPage->fDirty = false;
5307 pPage->fCached = false;
5308 pPage->fReusedFlushPending = false;
5309 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5310 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5311 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5312 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5313 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5314 pPage->cLastAccessHandler = 0;
5315 pPage->cLocked = 0;
5316#ifdef VBOX_STRICT
5317 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5318#endif
5319 }
5320 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5321 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5322 pPool->cUsedPages = 0;
5323
5324 /*
5325 * Zap and reinitialize the user records.
5326 */
5327 pPool->cPresent = 0;
5328 pPool->iUserFreeHead = 0;
5329 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5330 const unsigned cMaxUsers = pPool->cMaxUsers;
5331 for (unsigned i = 0; i < cMaxUsers; i++)
5332 {
5333 paUsers[i].iNext = i + 1;
5334 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5335 paUsers[i].iUserTable = 0xfffffffe;
5336 }
5337 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5338
5339 /*
5340 * Clear all the GCPhys links and rebuild the phys ext free list.
5341 */
5342 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
5343 pRam;
5344 pRam = pRam->CTX_SUFF(pNext))
5345 {
5346 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5347 while (iPage-- > 0)
5348 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5349 }
5350
5351 pPool->iPhysExtFreeHead = 0;
5352 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5353 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5354 for (unsigned i = 0; i < cMaxPhysExts; i++)
5355 {
5356 paPhysExts[i].iNext = i + 1;
5357 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5358 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5359 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5360 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5361 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5362 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5363 }
5364 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5365
5366 /*
5367 * Just zap the modified list.
5368 */
5369 pPool->cModifiedPages = 0;
5370 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5371
5372 /*
5373 * Clear the GCPhys hash and the age list.
5374 */
5375 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5376 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5377 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5378 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5379
5380#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5381 /* Clear all dirty pages. */
5382 pPool->idxFreeDirtyPage = 0;
5383 pPool->cDirtyPages = 0;
5384 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
5385 pPool->aDirtyPages[i].uIdx = NIL_PGMPOOL_IDX;
5386#endif
5387
5388 /*
5389 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5390 */
5391 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
5392 {
5393 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5394 pPage->iNext = NIL_PGMPOOL_IDX;
5395 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5396 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5397 pPage->cModifications = 0;
5398 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
5399 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5400 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5401 if (pPage->fMonitored)
5402 {
5403 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK,
5404 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
5405 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
5406 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
5407 pPool->pszAccessHandler);
5408 AssertFatalRCSuccess(rc);
5409 pgmPoolHashInsert(pPool, pPage);
5410 }
5411 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
5412 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
5413 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
5414 }
5415
5416 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5417 {
5418 /*
5419 * Re-enter the shadowing mode and assert Sync CR3 FF.
5420 */
5421 PVMCPU pVCpu = &pVM->aCpus[i];
5422 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5423 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5424 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5425 }
5426
5427 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5428}
5429
5430#endif /* IN_RING3 */
5431
5432#ifdef LOG_ENABLED
5433/**
5434 * Stringifies a PGMPOOLKIND value.
5435 */
5436static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5437{
5438 switch ((PGMPOOLKIND)enmKind)
5439 {
5440 case PGMPOOLKIND_INVALID:
5441 return "PGMPOOLKIND_INVALID";
5442 case PGMPOOLKIND_FREE:
5443 return "PGMPOOLKIND_FREE";
5444 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5445 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5446 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5447 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5448 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5449 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5450 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5451 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5452 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5453 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5454 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5455 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5456 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5457 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5458 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5459 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5460 case PGMPOOLKIND_32BIT_PD:
5461 return "PGMPOOLKIND_32BIT_PD";
5462 case PGMPOOLKIND_32BIT_PD_PHYS:
5463 return "PGMPOOLKIND_32BIT_PD_PHYS";
5464 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5465 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5466 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5467 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5468 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5469 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5470 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5471 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5472 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5473 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5474 case PGMPOOLKIND_PAE_PD_PHYS:
5475 return "PGMPOOLKIND_PAE_PD_PHYS";
5476 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5477 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5478 case PGMPOOLKIND_PAE_PDPT:
5479 return "PGMPOOLKIND_PAE_PDPT";
5480 case PGMPOOLKIND_PAE_PDPT_PHYS:
5481 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5482 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5483 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5484 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5485 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5486 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5487 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5488 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5489 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5490 case PGMPOOLKIND_64BIT_PML4:
5491 return "PGMPOOLKIND_64BIT_PML4";
5492 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5493 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5494 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5495 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5496 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5497 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5498 case PGMPOOLKIND_ROOT_NESTED:
5499 return "PGMPOOLKIND_ROOT_NESTED";
5500 }
5501 return "Unknown kind!";
5502}
5503#endif /* LOG_ENABLED*/
5504
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette