VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 55972

Last change on this file since 55972 was 55966, checked in by vboxsync, 10 years ago

PGM,++: VBOXSTRICTRC for physical access handlers.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 213.9 KB
Line 
1/* $Id: PGMAllPool.cpp 55966 2015-05-20 12:42:53Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2013 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#include <VBox/vmm/pgm.h>
24#include <VBox/vmm/mm.h>
25#include <VBox/vmm/em.h>
26#include <VBox/vmm/cpum.h>
27#ifdef IN_RC
28# include <VBox/vmm/patm.h>
29#endif
30#include "PGMInternal.h"
31#include <VBox/vmm/vm.h>
32#include "PGMInline.h"
33#include <VBox/disopcode.h>
34#include <VBox/vmm/hm_vmx.h>
35
36#include <VBox/log.h>
37#include <VBox/err.h>
38#include <iprt/asm.h>
39#include <iprt/asm-amd64-x86.h>
40#include <iprt/string.h>
41
42
43/*******************************************************************************
44* Internal Functions *
45*******************************************************************************/
46RT_C_DECLS_BEGIN
47DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
48DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
49static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
50static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
51static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
52static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
54static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
55#endif
56#if 0 /*defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)*/
57static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
58#endif
59
60int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
61PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
62void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
63void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
64
65RT_C_DECLS_END
66
67
68/**
69 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
70 *
71 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
72 * @param enmKind The page kind.
73 */
74DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
75{
76 switch (enmKind)
77 {
78 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
79 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
81 return true;
82 default:
83 return false;
84 }
85}
86
87
88/**
89 * Flushes a chain of pages sharing the same access monitor.
90 *
91 * @returns VBox status code suitable for scheduling.
92 * @param pPool The pool.
93 * @param pPage A page in the chain.
94 * @todo VBOXSTRICTRC
95 */
96int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
97{
98 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
99
100 /*
101 * Find the list head.
102 */
103 uint16_t idx = pPage->idx;
104 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
105 {
106 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
107 {
108 idx = pPage->iMonitoredPrev;
109 Assert(idx != pPage->idx);
110 pPage = &pPool->aPages[idx];
111 }
112 }
113
114 /*
115 * Iterate the list flushing each shadow page.
116 */
117 int rc = VINF_SUCCESS;
118 for (;;)
119 {
120 idx = pPage->iMonitoredNext;
121 Assert(idx != pPage->idx);
122 if (pPage->idx >= PGMPOOL_IDX_FIRST)
123 {
124 int rc2 = pgmPoolFlushPage(pPool, pPage);
125 AssertRC(rc2);
126 }
127 /* next */
128 if (idx == NIL_PGMPOOL_IDX)
129 break;
130 pPage = &pPool->aPages[idx];
131 }
132 return rc;
133}
134
135
136/**
137 * Wrapper for getting the current context pointer to the entry being modified.
138 *
139 * @returns VBox status code suitable for scheduling.
140 * @param pVM Pointer to the VM.
141 * @param pvDst Destination address
142 * @param pvSrc Pointer to the mapping of @a GCPhysSrc or NULL depending
143 * on the context (e.g. \#PF in R0 & RC).
144 * @param GCPhysSrc The source guest physical address.
145 * @param cb Size of data to read
146 */
147DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, void const *pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
148{
149#if defined(IN_RING3)
150 NOREF(pVM); NOREF(GCPhysSrc);
151 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
152 return VINF_SUCCESS;
153#else
154 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
155 NOREF(pvSrc);
156 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
157#endif
158}
159
160
161/**
162 * Process shadow entries before they are changed by the guest.
163 *
164 * For PT entries we will clear them. For PD entries, we'll simply check
165 * for mapping conflicts and set the SyncCR3 FF if found.
166 *
167 * @param pVCpu Pointer to the VMCPU.
168 * @param pPool The pool.
169 * @param pPage The head page.
170 * @param GCPhysFault The guest physical fault address.
171 * @param pvAddress Pointer to the mapping of @a GCPhysFault or NULL
172 * depending on the context (e.g. \#PF in R0 & RC).
173 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
174 */
175static void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault,
176 void const *pvAddress, unsigned cbWrite)
177{
178 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
179 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
180 PVM pVM = pPool->CTX_SUFF(pVM);
181 NOREF(pVCpu);
182
183 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))pvAddress, GCPhysFault, cbWrite));
184
185 for (;;)
186 {
187 union
188 {
189 void *pv;
190 PX86PT pPT;
191 PPGMSHWPTPAE pPTPae;
192 PX86PD pPD;
193 PX86PDPAE pPDPae;
194 PX86PDPT pPDPT;
195 PX86PML4 pPML4;
196 } uShw;
197
198 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s\n", pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
199
200 uShw.pv = NULL;
201 switch (pPage->enmKind)
202 {
203 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
204 {
205 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
206 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
207 const unsigned iShw = off / sizeof(X86PTE);
208 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
209 if (uShw.pPT->a[iShw].n.u1Present)
210 {
211 X86PTE GstPte;
212
213 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
214 AssertRC(rc);
215 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
216 pgmPoolTracDerefGCPhysHint(pPool, pPage,
217 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
218 GstPte.u & X86_PTE_PG_MASK,
219 iShw);
220 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
221 }
222 break;
223 }
224
225 /* page/2 sized */
226 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
227 {
228 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
229 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
230 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
231 {
232 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
233 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
234 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
235 {
236 X86PTE GstPte;
237 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
238 AssertRC(rc);
239
240 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
241 pgmPoolTracDerefGCPhysHint(pPool, pPage,
242 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
243 GstPte.u & X86_PTE_PG_MASK,
244 iShw);
245 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
246 }
247 }
248 break;
249 }
250
251 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
252 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
253 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
254 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
255 {
256 unsigned iGst = off / sizeof(X86PDE);
257 unsigned iShwPdpt = iGst / 256;
258 unsigned iShw = (iGst % 256) * 2;
259 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
260
261 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
262 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
263 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
264 {
265 for (unsigned i = 0; i < 2; i++)
266 {
267# ifdef VBOX_WITH_RAW_MODE_NOT_R0
268 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
269 {
270 Assert(pgmMapAreMappingsEnabled(pVM));
271 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
272 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
273 break;
274 }
275# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
276 if (uShw.pPDPae->a[iShw+i].n.u1Present)
277 {
278 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
279 pgmPoolFree(pVM,
280 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
281 pPage->idx,
282 iShw + i);
283 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw+i].u, 0);
284 }
285
286 /* paranoia / a bit assumptive. */
287 if ( (off & 3)
288 && (off & 3) + cbWrite > 4)
289 {
290 const unsigned iShw2 = iShw + 2 + i;
291 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
292 {
293# ifdef VBOX_WITH_RAW_MODE_NOT_R0
294 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
295 {
296 Assert(pgmMapAreMappingsEnabled(pVM));
297 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
298 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
299 break;
300 }
301# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
302 if (uShw.pPDPae->a[iShw2].n.u1Present)
303 {
304 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
305 pgmPoolFree(pVM,
306 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
307 pPage->idx,
308 iShw2);
309 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
310 }
311 }
312 }
313 }
314 }
315 break;
316 }
317
318 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
319 {
320 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
321 const unsigned iShw = off / sizeof(X86PTEPAE);
322 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
323 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
324 {
325 X86PTEPAE GstPte;
326 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
327 AssertRC(rc);
328
329 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
330 pgmPoolTracDerefGCPhysHint(pPool, pPage,
331 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
332 GstPte.u & X86_PTE_PAE_PG_MASK,
333 iShw);
334 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
335 }
336
337 /* paranoia / a bit assumptive. */
338 if ( (off & 7)
339 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
340 {
341 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
342 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
343
344 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
345 {
346 X86PTEPAE GstPte;
347 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
348 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
349 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
350 AssertRC(rc);
351 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
352 pgmPoolTracDerefGCPhysHint(pPool, pPage,
353 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
354 GstPte.u & X86_PTE_PAE_PG_MASK,
355 iShw2);
356 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
357 }
358 }
359 break;
360 }
361
362 case PGMPOOLKIND_32BIT_PD:
363 {
364 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
365 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
366
367 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
368 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
369# ifdef VBOX_WITH_RAW_MODE_NOT_R0
370 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
371 {
372 Assert(pgmMapAreMappingsEnabled(pVM));
373 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
374 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
375 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
376 break;
377 }
378 else
379# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
380 {
381 if (uShw.pPD->a[iShw].n.u1Present)
382 {
383 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
384 pgmPoolFree(pVM,
385 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
386 pPage->idx,
387 iShw);
388 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
389 }
390 }
391 /* paranoia / a bit assumptive. */
392 if ( (off & 3)
393 && (off & 3) + cbWrite > sizeof(X86PTE))
394 {
395 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
396 if ( iShw2 != iShw
397 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
398 {
399# ifdef VBOX_WITH_RAW_MODE_NOT_R0
400 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
401 {
402 Assert(pgmMapAreMappingsEnabled(pVM));
403 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
404 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
405 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
406 break;
407 }
408# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
409 if (uShw.pPD->a[iShw2].n.u1Present)
410 {
411 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
412 pgmPoolFree(pVM,
413 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
414 pPage->idx,
415 iShw2);
416 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
417 }
418 }
419 }
420#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). - not working any longer... */
421 if ( uShw.pPD->a[iShw].n.u1Present
422 && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
423 {
424 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
425# ifdef IN_RC /* TLB load - we're pushing things a bit... */
426 ASMProbeReadByte(pvAddress);
427# endif
428 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
429 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
430 }
431#endif
432 break;
433 }
434
435 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
436 {
437 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
438 const unsigned iShw = off / sizeof(X86PDEPAE);
439 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
440#ifdef VBOX_WITH_RAW_MODE_NOT_R0
441 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
442 {
443 Assert(pgmMapAreMappingsEnabled(pVM));
444 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
445 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
446 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
447 break;
448 }
449#endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
450 /*
451 * Causes trouble when the guest uses a PDE to refer to the whole page table level
452 * structure. (Invalidate here; faults later on when it tries to change the page
453 * table entries -> recheck; probably only applies to the RC case.)
454 */
455#ifdef VBOX_WITH_RAW_MODE_NOT_R0
456 else
457#endif
458 {
459 if (uShw.pPDPae->a[iShw].n.u1Present)
460 {
461 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
462 pgmPoolFree(pVM,
463 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
464 pPage->idx,
465 iShw);
466 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
467 }
468 }
469 /* paranoia / a bit assumptive. */
470 if ( (off & 7)
471 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
472 {
473 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
474 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
475
476#ifdef VBOX_WITH_RAW_MODE_NOT_R0
477 if ( iShw2 != iShw
478 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
479 {
480 Assert(pgmMapAreMappingsEnabled(pVM));
481 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
482 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
483 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
484 break;
485 }
486 else
487#endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
488 if (uShw.pPDPae->a[iShw2].n.u1Present)
489 {
490 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
491 pgmPoolFree(pVM,
492 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
493 pPage->idx,
494 iShw2);
495 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
496 }
497 }
498 break;
499 }
500
501 case PGMPOOLKIND_PAE_PDPT:
502 {
503 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
504 /*
505 * Hopefully this doesn't happen very often:
506 * - touching unused parts of the page
507 * - messing with the bits of pd pointers without changing the physical address
508 */
509 /* PDPT roots are not page aligned; 32 byte only! */
510 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
511
512 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
513 const unsigned iShw = offPdpt / sizeof(X86PDPE);
514 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
515 {
516# ifdef VBOX_WITH_RAW_MODE_NOT_R0
517 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
518 {
519 Assert(pgmMapAreMappingsEnabled(pVM));
520 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
521 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
522 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
523 break;
524 }
525 else
526# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
527 if (uShw.pPDPT->a[iShw].n.u1Present)
528 {
529 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
530 pgmPoolFree(pVM,
531 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
532 pPage->idx,
533 iShw);
534 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
535 }
536
537 /* paranoia / a bit assumptive. */
538 if ( (offPdpt & 7)
539 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
540 {
541 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
542 if ( iShw2 != iShw
543 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
544 {
545# ifdef VBOX_WITH_RAW_MODE_NOT_R0
546 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
547 {
548 Assert(pgmMapAreMappingsEnabled(pVM));
549 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
550 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
551 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
552 break;
553 }
554 else
555# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
556 if (uShw.pPDPT->a[iShw2].n.u1Present)
557 {
558 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
559 pgmPoolFree(pVM,
560 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
561 pPage->idx,
562 iShw2);
563 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
564 }
565 }
566 }
567 }
568 break;
569 }
570
571#ifndef IN_RC
572 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
573 {
574 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
575 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
576 const unsigned iShw = off / sizeof(X86PDEPAE);
577 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
578 if (uShw.pPDPae->a[iShw].n.u1Present)
579 {
580 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
581 pgmPoolFree(pVM,
582 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
583 pPage->idx,
584 iShw);
585 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
586 }
587 /* paranoia / a bit assumptive. */
588 if ( (off & 7)
589 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
590 {
591 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
592 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
593
594 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
595 if (uShw.pPDPae->a[iShw2].n.u1Present)
596 {
597 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
598 pgmPoolFree(pVM,
599 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
600 pPage->idx,
601 iShw2);
602 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
603 }
604 }
605 break;
606 }
607
608 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
609 {
610 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
611 /*
612 * Hopefully this doesn't happen very often:
613 * - messing with the bits of pd pointers without changing the physical address
614 */
615 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
616 const unsigned iShw = off / sizeof(X86PDPE);
617 if (uShw.pPDPT->a[iShw].n.u1Present)
618 {
619 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
620 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
621 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
622 }
623 /* paranoia / a bit assumptive. */
624 if ( (off & 7)
625 && (off & 7) + cbWrite > sizeof(X86PDPE))
626 {
627 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
628 if (uShw.pPDPT->a[iShw2].n.u1Present)
629 {
630 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
631 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
632 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
633 }
634 }
635 break;
636 }
637
638 case PGMPOOLKIND_64BIT_PML4:
639 {
640 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
641 /*
642 * Hopefully this doesn't happen very often:
643 * - messing with the bits of pd pointers without changing the physical address
644 */
645 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
646 const unsigned iShw = off / sizeof(X86PDPE);
647 if (uShw.pPML4->a[iShw].n.u1Present)
648 {
649 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
650 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
651 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
652 }
653 /* paranoia / a bit assumptive. */
654 if ( (off & 7)
655 && (off & 7) + cbWrite > sizeof(X86PDPE))
656 {
657 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
658 if (uShw.pPML4->a[iShw2].n.u1Present)
659 {
660 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
661 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
662 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
663 }
664 }
665 break;
666 }
667#endif /* IN_RING0 */
668
669 default:
670 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
671 }
672 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
673
674 /* next */
675 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
676 return;
677 pPage = &pPool->aPages[pPage->iMonitoredNext];
678 }
679}
680
681# ifndef IN_RING3
682
683/**
684 * Checks if a access could be a fork operation in progress.
685 *
686 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
687 *
688 * @returns true if it's likely that we're forking, otherwise false.
689 * @param pPool The pool.
690 * @param pDis The disassembled instruction.
691 * @param offFault The access offset.
692 */
693DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
694{
695 /*
696 * i386 linux is using btr to clear X86_PTE_RW.
697 * The functions involved are (2.6.16 source inspection):
698 * clear_bit
699 * ptep_set_wrprotect
700 * copy_one_pte
701 * copy_pte_range
702 * copy_pmd_range
703 * copy_pud_range
704 * copy_page_range
705 * dup_mmap
706 * dup_mm
707 * copy_mm
708 * copy_process
709 * do_fork
710 */
711 if ( pDis->pCurInstr->uOpcode == OP_BTR
712 && !(offFault & 4)
713 /** @todo Validate that the bit index is X86_PTE_RW. */
714 )
715 {
716 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
717 return true;
718 }
719 return false;
720}
721
722
723/**
724 * Determine whether the page is likely to have been reused.
725 *
726 * @returns true if we consider the page as being reused for a different purpose.
727 * @returns false if we consider it to still be a paging page.
728 * @param pVM Pointer to the VM.
729 * @param pVCpu Pointer to the VMCPU.
730 * @param pRegFrame Trap register frame.
731 * @param pDis The disassembly info for the faulting instruction.
732 * @param pvFault The fault address.
733 *
734 * @remark The REP prefix check is left to the caller because of STOSD/W.
735 */
736DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
737{
738#ifndef IN_RC
739 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
740 if ( HMHasPendingIrq(pVM)
741 && (pRegFrame->rsp - pvFault) < 32)
742 {
743 /* Fault caused by stack writes while trying to inject an interrupt event. */
744 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
745 return true;
746 }
747#else
748 NOREF(pVM); NOREF(pvFault);
749#endif
750
751 LogFlow(("Reused instr %RGv %d at %RGv param1.fUse=%llx param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->uOpcode, pvFault, pDis->Param1.fUse, pDis->Param1.Base.idxGenReg));
752
753 /* Non-supervisor mode write means it's used for something else. */
754 if (CPUMGetGuestCPL(pVCpu) == 3)
755 return true;
756
757 switch (pDis->pCurInstr->uOpcode)
758 {
759 /* call implies the actual push of the return address faulted */
760 case OP_CALL:
761 Log4(("pgmPoolMonitorIsReused: CALL\n"));
762 return true;
763 case OP_PUSH:
764 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
765 return true;
766 case OP_PUSHF:
767 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
768 return true;
769 case OP_PUSHA:
770 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
771 return true;
772 case OP_FXSAVE:
773 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
774 return true;
775 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
776 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
777 return true;
778 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
779 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
780 return true;
781 case OP_MOVSWD:
782 case OP_STOSWD:
783 if ( pDis->fPrefix == (DISPREFIX_REP|DISPREFIX_REX)
784 && pRegFrame->rcx >= 0x40
785 )
786 {
787 Assert(pDis->uCpuMode == DISCPUMODE_64BIT);
788
789 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
790 return true;
791 }
792 return false;
793 }
794 if ( ( (pDis->Param1.fUse & DISUSE_REG_GEN32)
795 || (pDis->Param1.fUse & DISUSE_REG_GEN64))
796 && (pDis->Param1.Base.idxGenReg == DISGREG_ESP))
797 {
798 Log4(("pgmPoolMonitorIsReused: ESP\n"));
799 return true;
800 }
801
802 return false;
803}
804
805
806/**
807 * Flushes the page being accessed.
808 *
809 * @returns VBox status code suitable for scheduling.
810 * @param pVM Pointer to the VM.
811 * @param pVCpu Pointer to the VMCPU.
812 * @param pPool The pool.
813 * @param pPage The pool page (head).
814 * @param pDis The disassembly of the write instruction.
815 * @param pRegFrame The trap register frame.
816 * @param GCPhysFault The fault address as guest physical address.
817 * @param pvFault The fault address.
818 * @todo VBOXSTRICTRC
819 */
820static int pgmPoolAccessPfHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
821 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
822{
823 NOREF(pVM); NOREF(GCPhysFault);
824
825 /*
826 * First, do the flushing.
827 */
828 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
829
830 /*
831 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
832 * Must do this in raw mode (!); XP boot will fail otherwise.
833 */
834 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
835 if (rc2 == VINF_SUCCESS)
836 { /* do nothing */ }
837#ifdef VBOX_WITH_IEM
838 else if (rc2 == VINF_EM_RESCHEDULE)
839 {
840 if (rc == VINF_SUCCESS)
841 rc = VBOXSTRICTRC_VAL(rc2);
842# ifndef IN_RING3
843 VMCPU_FF_SET(pVCpu, VMCPU_FF_TO_R3);
844# endif
845 }
846#endif
847 else if (rc2 == VERR_EM_INTERPRETER)
848 {
849#ifdef IN_RC
850 if (PATMIsPatchGCAddr(pVM, pRegFrame->eip))
851 {
852 LogFlow(("pgmPoolAccessPfHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
853 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->eip));
854 rc = VINF_SUCCESS;
855 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
856 }
857 else
858#endif
859 {
860 rc = VINF_EM_RAW_EMULATE_INSTR;
861 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
862 }
863 }
864 else if (RT_FAILURE_NP(rc2))
865 rc = VBOXSTRICTRC_VAL(rc2);
866 else
867 AssertMsgFailed(("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
868
869 LogFlow(("pgmPoolAccessPfHandlerPT: returns %Rrc (flushed)\n", rc));
870 return rc;
871}
872
873
874/**
875 * Handles the STOSD write accesses.
876 *
877 * @returns VBox status code suitable for scheduling.
878 * @param pVM Pointer to the VM.
879 * @param pPool The pool.
880 * @param pPage The pool page (head).
881 * @param pDis The disassembly of the write instruction.
882 * @param pRegFrame The trap register frame.
883 * @param GCPhysFault The fault address as guest physical address.
884 * @param pvFault The fault address.
885 */
886DECLINLINE(int) pgmPoolAccessPfHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
887 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
888{
889 unsigned uIncrement = pDis->Param1.cb;
890 NOREF(pVM);
891
892 Assert(pDis->uCpuMode == DISCPUMODE_32BIT || pDis->uCpuMode == DISCPUMODE_64BIT);
893 Assert(pRegFrame->rcx <= 0x20);
894
895#ifdef VBOX_STRICT
896 if (pDis->uOpMode == DISCPUMODE_32BIT)
897 Assert(uIncrement == 4);
898 else
899 Assert(uIncrement == 8);
900#endif
901
902 Log3(("pgmPoolAccessPfHandlerSTOSD\n"));
903
904 /*
905 * Increment the modification counter and insert it into the list
906 * of modified pages the first time.
907 */
908 if (!pPage->cModifications++)
909 pgmPoolMonitorModifiedInsert(pPool, pPage);
910
911 /*
912 * Execute REP STOSD.
913 *
914 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
915 * write situation, meaning that it's safe to write here.
916 */
917 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
918 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
919 while (pRegFrame->rcx)
920 {
921#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
922 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
923 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
924 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
925#else
926 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
927#endif
928#ifdef IN_RC
929 *(uint32_t *)(uintptr_t)pu32 = pRegFrame->eax;
930#else
931 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
932#endif
933 pu32 += uIncrement;
934 GCPhysFault += uIncrement;
935 pRegFrame->rdi += uIncrement;
936 pRegFrame->rcx--;
937 }
938 pRegFrame->rip += pDis->cbInstr;
939
940 LogFlow(("pgmPoolAccessPfHandlerSTOSD: returns\n"));
941 return VINF_SUCCESS;
942}
943
944
945/**
946 * Handles the simple write accesses.
947 *
948 * @returns VBox status code suitable for scheduling.
949 * @param pVM Pointer to the VM.
950 * @param pVCpu Pointer to the VMCPU.
951 * @param pPool The pool.
952 * @param pPage The pool page (head).
953 * @param pDis The disassembly of the write instruction.
954 * @param pRegFrame The trap register frame.
955 * @param GCPhysFault The fault address as guest physical address.
956 * @param pvFault The fault address.
957 * @param pfReused Reused state (in/out)
958 */
959DECLINLINE(int) pgmPoolAccessPfHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
960 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
961{
962 Log3(("pgmPoolAccessPfHandlerSimple\n"));
963 NOREF(pVM);
964 NOREF(pfReused); /* initialized by caller */
965
966 /*
967 * Increment the modification counter and insert it into the list
968 * of modified pages the first time.
969 */
970 if (!pPage->cModifications++)
971 pgmPoolMonitorModifiedInsert(pPool, pPage);
972
973 /*
974 * Clear all the pages. ASSUMES that pvFault is readable.
975 */
976#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
977 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
978#endif
979
980 uint32_t cbWrite = DISGetParamSize(pDis, &pDis->Param1);
981 if (cbWrite <= 8)
982 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, cbWrite);
983 else
984 {
985 Assert(cbWrite <= 16);
986 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, 8);
987 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + 8, NULL, cbWrite - 8);
988 }
989
990#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
991 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
992#endif
993
994 /*
995 * Interpret the instruction.
996 */
997 VBOXSTRICTRC rc = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
998 if (RT_SUCCESS(rc))
999 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
1000 else if (rc == VERR_EM_INTERPRETER)
1001 {
1002 LogFlow(("pgmPoolAccessPfHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1003 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode));
1004 rc = VINF_EM_RAW_EMULATE_INSTR;
1005 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1006 }
1007
1008#if 0 /* experimental code */
1009 if (rc == VINF_SUCCESS)
1010 {
1011 switch (pPage->enmKind)
1012 {
1013 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1014 {
1015 X86PTEPAE GstPte;
1016 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1017 AssertRC(rc);
1018
1019 /* Check the new value written by the guest. If present and with a bogus physical address, then
1020 * it's fairly safe to assume the guest is reusing the PT.
1021 */
1022 if (GstPte.n.u1Present)
1023 {
1024 RTHCPHYS HCPhys = -1;
1025 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1026 if (rc != VINF_SUCCESS)
1027 {
1028 *pfReused = true;
1029 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1030 }
1031 }
1032 break;
1033 }
1034 }
1035 }
1036#endif
1037
1038 LogFlow(("pgmPoolAccessPfHandlerSimple: returns %Rrc\n", VBOXSTRICTRC_VAL(rc)));
1039 return VBOXSTRICTRC_VAL(rc);
1040}
1041
1042
1043/**
1044 * \#PF Handler callback for PT write accesses.
1045 *
1046 * @returns VBox status code (appropriate for GC return).
1047 * @param pVM Pointer to the VM.
1048 * @param pVCpu Pointer to the cross context CPU context for the
1049 * calling EMT.
1050 * @param uErrorCode CPU Error code.
1051 * @param pRegFrame Trap register frame.
1052 * NULL on DMA and other non CPU access.
1053 * @param pvFault The fault address (cr2).
1054 * @param GCPhysFault The GC physical address corresponding to pvFault.
1055 * @param pvUser User argument.
1056 */
1057DECLEXPORT(VBOXSTRICTRC) pgmPoolAccessPfHandler(PVM pVM, PVMCPU pVCpu, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame,
1058 RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1059{
1060 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1061 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1062 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1063 unsigned cMaxModifications;
1064 bool fForcedFlush = false;
1065 NOREF(uErrorCode);
1066
1067 LogFlow(("pgmPoolAccessPfHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1068
1069 pgmLock(pVM);
1070 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1071 {
1072 /* Pool page changed while we were waiting for the lock; ignore. */
1073 Log(("CPU%d: pgmPoolAccessPfHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1074 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1075 pgmUnlock(pVM);
1076 return VINF_SUCCESS;
1077 }
1078#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1079 if (pPage->fDirty)
1080 {
1081 Assert(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH));
1082 pgmUnlock(pVM);
1083 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1084 }
1085#endif
1086
1087#if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1088 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1089 {
1090 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1091 void *pvGst;
1092 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1093 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1094 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1095 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1096 }
1097#endif
1098
1099 /*
1100 * Disassemble the faulting instruction.
1101 */
1102 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1103 int rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
1104 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1105 {
1106 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1107 pgmUnlock(pVM);
1108 return rc;
1109 }
1110
1111 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1112
1113 /*
1114 * We should ALWAYS have the list head as user parameter. This
1115 * is because we use that page to record the changes.
1116 */
1117 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1118
1119#ifdef IN_RING0
1120 /* Maximum nr of modifications depends on the page type. */
1121 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1122 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1123 cMaxModifications = 4;
1124 else
1125 cMaxModifications = 24;
1126#else
1127 cMaxModifications = 48;
1128#endif
1129
1130 /*
1131 * Incremental page table updates should weigh more than random ones.
1132 * (Only applies when started from offset 0)
1133 */
1134 pVCpu->pgm.s.cPoolAccessHandler++;
1135 if ( pPage->GCPtrLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1136 && pPage->GCPtrLastAccessHandlerRip < pRegFrame->rip + 0x40
1137 && pvFault == (pPage->GCPtrLastAccessHandlerFault + pDis->Param1.cb)
1138 && pVCpu->pgm.s.cPoolAccessHandler == pPage->cLastAccessHandler + 1)
1139 {
1140 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1141 Assert(pPage->cModifications < 32000);
1142 pPage->cModifications = pPage->cModifications * 2;
1143 pPage->GCPtrLastAccessHandlerFault = pvFault;
1144 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1145 if (pPage->cModifications >= cMaxModifications)
1146 {
1147 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1148 fForcedFlush = true;
1149 }
1150 }
1151
1152 if (pPage->cModifications >= cMaxModifications)
1153 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1154
1155 /*
1156 * Check if it's worth dealing with.
1157 */
1158 bool fReused = false;
1159 bool fNotReusedNotForking = false;
1160 if ( ( pPage->cModifications < cMaxModifications /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1161 || pgmPoolIsPageLocked(pPage)
1162 )
1163 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1164 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1165 {
1166 /*
1167 * Simple instructions, no REP prefix.
1168 */
1169 if (!(pDis->fPrefix & (DISPREFIX_REP | DISPREFIX_REPNE)))
1170 {
1171 rc = pgmPoolAccessPfHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1172 if (fReused)
1173 goto flushPage;
1174
1175 /* A mov instruction to change the first page table entry will be remembered so we can detect
1176 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1177 */
1178 if ( rc == VINF_SUCCESS
1179 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1180 && pDis->pCurInstr->uOpcode == OP_MOV
1181 && (pvFault & PAGE_OFFSET_MASK) == 0)
1182 {
1183 pPage->GCPtrLastAccessHandlerFault = pvFault;
1184 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1185 pPage->GCPtrLastAccessHandlerRip = pRegFrame->rip;
1186 /* Make sure we don't kick out a page too quickly. */
1187 if (pPage->cModifications > 8)
1188 pPage->cModifications = 2;
1189 }
1190 else if (pPage->GCPtrLastAccessHandlerFault == pvFault)
1191 {
1192 /* ignore the 2nd write to this page table entry. */
1193 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1194 }
1195 else
1196 {
1197 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
1198 pPage->GCPtrLastAccessHandlerRip = 0;
1199 }
1200
1201 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1202 pgmUnlock(pVM);
1203 return rc;
1204 }
1205
1206 /*
1207 * Windows is frequently doing small memset() operations (netio test 4k+).
1208 * We have to deal with these or we'll kill the cache and performance.
1209 */
1210 if ( pDis->pCurInstr->uOpcode == OP_STOSWD
1211 && !pRegFrame->eflags.Bits.u1DF
1212 && pDis->uOpMode == pDis->uCpuMode
1213 && pDis->uAddrMode == pDis->uCpuMode)
1214 {
1215 bool fValidStosd = false;
1216
1217 if ( pDis->uCpuMode == DISCPUMODE_32BIT
1218 && pDis->fPrefix == DISPREFIX_REP
1219 && pRegFrame->ecx <= 0x20
1220 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1221 && !((uintptr_t)pvFault & 3)
1222 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1223 )
1224 {
1225 fValidStosd = true;
1226 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1227 }
1228 else
1229 if ( pDis->uCpuMode == DISCPUMODE_64BIT
1230 && pDis->fPrefix == (DISPREFIX_REP | DISPREFIX_REX)
1231 && pRegFrame->rcx <= 0x20
1232 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1233 && !((uintptr_t)pvFault & 7)
1234 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1235 )
1236 {
1237 fValidStosd = true;
1238 }
1239
1240 if (fValidStosd)
1241 {
1242 rc = pgmPoolAccessPfHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1243 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1244 pgmUnlock(pVM);
1245 return rc;
1246 }
1247 }
1248
1249 /* REP prefix, don't bother. */
1250 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1251 Log4(("pgmPoolAccessPfHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1252 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode, pDis->fPrefix));
1253 fNotReusedNotForking = true;
1254 }
1255
1256#if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1257 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1258 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1259 */
1260 if ( pPage->cModifications >= cMaxModifications
1261 && !fForcedFlush
1262 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1263 && ( fNotReusedNotForking
1264 || ( !pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
1265 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1266 )
1267 )
1268 {
1269 Assert(!pgmPoolIsPageLocked(pPage));
1270 Assert(pPage->fDirty == false);
1271
1272 /* Flush any monitored duplicates as we will disable write protection. */
1273 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1274 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1275 {
1276 PPGMPOOLPAGE pPageHead = pPage;
1277
1278 /* Find the monitor head. */
1279 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1280 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1281
1282 while (pPageHead)
1283 {
1284 unsigned idxNext = pPageHead->iMonitoredNext;
1285
1286 if (pPageHead != pPage)
1287 {
1288 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1289 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1290 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1291 AssertRC(rc2);
1292 }
1293
1294 if (idxNext == NIL_PGMPOOL_IDX)
1295 break;
1296
1297 pPageHead = &pPool->aPages[idxNext];
1298 }
1299 }
1300
1301 /* The flushing above might fail for locked pages, so double check. */
1302 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1303 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1304 {
1305 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1306
1307 /* Temporarily allow write access to the page table again. */
1308 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK, pPage->GCPhys & PAGE_BASE_GC_MASK);
1309 if (rc == VINF_SUCCESS)
1310 {
1311 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1312 AssertMsg(rc == VINF_SUCCESS
1313 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1314 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1315 || rc == VERR_PAGE_NOT_PRESENT,
1316 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1317# ifdef VBOX_STRICT
1318 pPage->GCPtrDirtyFault = pvFault;
1319# endif
1320
1321 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1322 pgmUnlock(pVM);
1323 return rc;
1324 }
1325 }
1326 }
1327#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1328
1329 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushModOverflow));
1330flushPage:
1331 /*
1332 * Not worth it, so flush it.
1333 *
1334 * If we considered it to be reused, don't go back to ring-3
1335 * to emulate failed instructions since we usually cannot
1336 * interpret then. This may be a bit risky, in which case
1337 * the reuse detection must be fixed.
1338 */
1339 rc = pgmPoolAccessPfHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1340 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1341 && fReused)
1342 {
1343 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1344 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1345 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1346 }
1347 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1348 pgmUnlock(pVM);
1349 return rc;
1350}
1351
1352# endif /* !IN_RING3 */
1353
1354/**
1355 * Access handler callback for PT write accesses.
1356 *
1357 * The handler can not raise any faults, it's mainly for monitoring write access
1358 * to certain pages.
1359 *
1360 * @returns VINF_SUCCESS if the handler has carried out the operation.
1361 * @returns VINF_PGM_HANDLER_DO_DEFAULT if the caller should carry out the access operation.
1362 * @param pVM Pointer to the VM.
1363 * @param pVCpu The cross context CPU structure for the calling EMT.
1364 * @param GCPhys The physical address the guest is writing to.
1365 * @param pvPhys The HC mapping of that address.
1366 * @param pvBuf What the guest is reading/writing.
1367 * @param cbBuf How much it's reading/writing.
1368 * @param enmAccessType The access type.
1369 * @param enmOrigin Who is making the access.
1370 * @param pvUser User argument.
1371 */
1372PGM_ALL_CB2_DECL(VBOXSTRICTRC)
1373pgmPoolAccessHandler(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf,
1374 PGMACCESSTYPE enmAccessType, PGMACCESSORIGIN enmOrigin, void *pvUser)
1375{
1376 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1377 STAM_PROFILE_START(&pPool->StatMonitorR3, a);
1378 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1379 LogFlow(("PGM_ALL_CB_DECL: GCPhys=%RGp %p:{.Core=%RHp, .idx=%d, .GCPhys=%RGp, .enmType=%d}\n",
1380 GCPhys, pPage, pPage->Core.Key, pPage->idx, pPage->GCPhys, pPage->enmKind));
1381
1382 NOREF(pvBuf); NOREF(enmAccessType);
1383
1384 /*
1385 * Make sure the pool page wasn't modified by a different CPU.
1386 */
1387 pgmLock(pVM);
1388 if (PHYS_PAGE_ADDRESS(GCPhys) == PHYS_PAGE_ADDRESS(pPage->GCPhys))
1389 {
1390 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1391
1392 /* The max modification count before flushing depends on the context and page type. */
1393#ifdef IN_RING3
1394 uint16_t const cMaxModifications = 96; /* it's cheaper here, right? */
1395#else
1396 uint16_t cMaxModifications;
1397 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1398 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1399 cMaxModifications = 4;
1400 else
1401 cMaxModifications = 24;
1402# ifdef IN_RC
1403 cMaxModifications *= 2; /* traps are cheaper than exists. */
1404# endif
1405#endif
1406
1407 /*
1408 * We don't have to be very sophisticated about this since there are relativly few calls here.
1409 * However, we must try our best to detect any non-cpu accesses (disk / networking).
1410 */
1411 if ( ( pPage->cModifications < cMaxModifications
1412 || pgmPoolIsPageLocked(pPage) )
1413 && enmOrigin != PGMACCESSORIGIN_DEVICE
1414 && cbBuf <= 16)
1415 {
1416 /* Clear the shadow entry. */
1417 if (!pPage->cModifications++)
1418 pgmPoolMonitorModifiedInsert(pPool, pPage);
1419
1420 if (cbBuf <= 8)
1421 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, (uint32_t)cbBuf);
1422 else
1423 {
1424 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, 8);
1425 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys + 8, (uint8_t *)pvBuf + 8, (uint32_t)cbBuf - 8);
1426 }
1427 }
1428 else
1429 {
1430 /* ASSUME that VERR_PGM_POOL_CLEARED can be ignored here and that FFs will deal with it in due time. */
1431 pgmPoolMonitorChainFlush(pPool, pPage);
1432 }
1433
1434 STAM_PROFILE_STOP_EX(&pPool->StatMonitorR3, &pPool->StatMonitorR3FlushPage, a);
1435 }
1436 else
1437 Log(("CPU%d: PGM_ALL_CB_DECL pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhys), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1438 pgmUnlock(pVM);
1439 return VINF_PGM_HANDLER_DO_DEFAULT;
1440}
1441
1442
1443# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1444
1445# if defined(VBOX_STRICT) && !defined(IN_RING3)
1446
1447/**
1448 * Check references to guest physical memory in a PAE / PAE page table.
1449 *
1450 * @param pPool The pool.
1451 * @param pPage The page.
1452 * @param pShwPT The shadow page table (mapping of the page).
1453 * @param pGstPT The guest page table.
1454 */
1455static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1456{
1457 unsigned cErrors = 0;
1458 int LastRc = -1; /* initialized to shut up gcc */
1459 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1460 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1461 PVM pVM = pPool->CTX_SUFF(pVM);
1462
1463#ifdef VBOX_STRICT
1464 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1465 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1466#endif
1467 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1468 {
1469 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1470 {
1471 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1472 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1473 if ( rc != VINF_SUCCESS
1474 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1475 {
1476 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1477 LastPTE = i;
1478 LastRc = rc;
1479 LastHCPhys = HCPhys;
1480 cErrors++;
1481
1482 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1483 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1484 AssertRC(rc);
1485
1486 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1487 {
1488 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1489
1490 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1491 {
1492 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1493
1494 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1495 {
1496 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1497 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1498 {
1499 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1500 }
1501 }
1502
1503 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1504 }
1505 }
1506 }
1507 }
1508 }
1509 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1510}
1511
1512
1513/**
1514 * Check references to guest physical memory in a PAE / 32-bit page table.
1515 *
1516 * @param pPool The pool.
1517 * @param pPage The page.
1518 * @param pShwPT The shadow page table (mapping of the page).
1519 * @param pGstPT The guest page table.
1520 */
1521static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1522{
1523 unsigned cErrors = 0;
1524 int LastRc = -1; /* initialized to shut up gcc */
1525 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1526 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1527 PVM pVM = pPool->CTX_SUFF(pVM);
1528
1529#ifdef VBOX_STRICT
1530 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1531 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1532#endif
1533 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1534 {
1535 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1536 {
1537 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1538 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1539 if ( rc != VINF_SUCCESS
1540 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1541 {
1542 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1543 LastPTE = i;
1544 LastRc = rc;
1545 LastHCPhys = HCPhys;
1546 cErrors++;
1547
1548 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1549 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1550 AssertRC(rc);
1551
1552 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1553 {
1554 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1555
1556 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1557 {
1558 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1559
1560 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1561 {
1562 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1563 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1564 {
1565 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1566 }
1567 }
1568
1569 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1570 }
1571 }
1572 }
1573 }
1574 }
1575 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1576}
1577
1578# endif /* VBOX_STRICT && !IN_RING3 */
1579
1580/**
1581 * Clear references to guest physical memory in a PAE / PAE page table.
1582 *
1583 * @returns nr of changed PTEs
1584 * @param pPool The pool.
1585 * @param pPage The page.
1586 * @param pShwPT The shadow page table (mapping of the page).
1587 * @param pGstPT The guest page table.
1588 * @param pOldGstPT The old cached guest page table.
1589 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1590 * @param pfFlush Flush reused page table (out)
1591 */
1592DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1593 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1594{
1595 unsigned cChanged = 0;
1596
1597#ifdef VBOX_STRICT
1598 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1599 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1600#endif
1601 *pfFlush = false;
1602
1603 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1604 {
1605 /* Check the new value written by the guest. If present and with a bogus physical address, then
1606 * it's fairly safe to assume the guest is reusing the PT.
1607 */
1608 if ( fAllowRemoval
1609 && pGstPT->a[i].n.u1Present)
1610 {
1611 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1612 {
1613 *pfFlush = true;
1614 return ++cChanged;
1615 }
1616 }
1617 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1618 {
1619 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1620 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1621 {
1622#ifdef VBOX_STRICT
1623 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1624 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1625 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1626#endif
1627 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1628 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1629 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1630 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1631
1632 if ( uHostAttr == uGuestAttr
1633 && fHostRW <= fGuestRW)
1634 continue;
1635 }
1636 cChanged++;
1637 /* Something was changed, so flush it. */
1638 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1639 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1640 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1641 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1642 }
1643 }
1644 return cChanged;
1645}
1646
1647
1648/**
1649 * Clear references to guest physical memory in a PAE / PAE page table.
1650 *
1651 * @returns nr of changed PTEs
1652 * @param pPool The pool.
1653 * @param pPage The page.
1654 * @param pShwPT The shadow page table (mapping of the page).
1655 * @param pGstPT The guest page table.
1656 * @param pOldGstPT The old cached guest page table.
1657 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1658 * @param pfFlush Flush reused page table (out)
1659 */
1660DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1661 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1662{
1663 unsigned cChanged = 0;
1664
1665#ifdef VBOX_STRICT
1666 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1667 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1668#endif
1669 *pfFlush = false;
1670
1671 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1672 {
1673 /* Check the new value written by the guest. If present and with a bogus physical address, then
1674 * it's fairly safe to assume the guest is reusing the PT.
1675 */
1676 if ( fAllowRemoval
1677 && pGstPT->a[i].n.u1Present)
1678 {
1679 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK))
1680 {
1681 *pfFlush = true;
1682 return ++cChanged;
1683 }
1684 }
1685 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1686 {
1687 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1688 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1689 {
1690#ifdef VBOX_STRICT
1691 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1692 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1693 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1694#endif
1695 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1696 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1697 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1698 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1699
1700 if ( uHostAttr == uGuestAttr
1701 && fHostRW <= fGuestRW)
1702 continue;
1703 }
1704 cChanged++;
1705 /* Something was changed, so flush it. */
1706 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1707 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1708 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1709 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1710 }
1711 }
1712 return cChanged;
1713}
1714
1715
1716/**
1717 * Flush a dirty page
1718 *
1719 * @param pVM Pointer to the VM.
1720 * @param pPool The pool.
1721 * @param idxSlot Dirty array slot index
1722 * @param fAllowRemoval Allow a reused page table to be removed
1723 */
1724static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1725{
1726 PPGMPOOLPAGE pPage;
1727 unsigned idxPage;
1728
1729 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1730 if (pPool->aDirtyPages[idxSlot].uIdx == NIL_PGMPOOL_IDX)
1731 return;
1732
1733 idxPage = pPool->aDirtyPages[idxSlot].uIdx;
1734 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1735 pPage = &pPool->aPages[idxPage];
1736 Assert(pPage->idx == idxPage);
1737 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1738
1739 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1740 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1741
1742#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1743 PVMCPU pVCpu = VMMGetCpu(pVM);
1744 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
1745#endif
1746
1747 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1748 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK);
1749 Assert(rc == VINF_SUCCESS);
1750 pPage->fDirty = false;
1751
1752#ifdef VBOX_STRICT
1753 uint64_t fFlags = 0;
1754 RTHCPHYS HCPhys;
1755 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->GCPtrDirtyFault, &fFlags, &HCPhys);
1756 AssertMsg( ( rc == VINF_SUCCESS
1757 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1758 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1759 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1760 || rc == VERR_PAGE_NOT_PRESENT,
1761 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->GCPtrDirtyFault, rc, fFlags));
1762#endif
1763
1764 /* Flush those PTEs that have changed. */
1765 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1766 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1767 void *pvGst;
1768 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1769 bool fFlush;
1770 unsigned cChanges;
1771
1772 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1773 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1774 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1775 else
1776 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1777 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1778
1779 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1780 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1781 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1782 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1783
1784 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1785 Assert(pPage->cModifications);
1786 if (cChanges < 4)
1787 pPage->cModifications = 1; /* must use > 0 here */
1788 else
1789 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1790
1791 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1792 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1793 pPool->idxFreeDirtyPage = idxSlot;
1794
1795 pPool->cDirtyPages--;
1796 pPool->aDirtyPages[idxSlot].uIdx = NIL_PGMPOOL_IDX;
1797 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1798 if (fFlush)
1799 {
1800 Assert(fAllowRemoval);
1801 Log(("Flush reused page table!\n"));
1802 pgmPoolFlushPage(pPool, pPage);
1803 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1804 }
1805 else
1806 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1807
1808#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1809 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
1810#endif
1811}
1812
1813
1814# ifndef IN_RING3
1815/**
1816 * Add a new dirty page
1817 *
1818 * @param pVM Pointer to the VM.
1819 * @param pPool The pool.
1820 * @param pPage The page.
1821 */
1822void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1823{
1824 unsigned idxFree;
1825
1826 PGM_LOCK_ASSERT_OWNER(pVM);
1827 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1828 Assert(!pPage->fDirty);
1829
1830 idxFree = pPool->idxFreeDirtyPage;
1831 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1832 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1833
1834 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1835 {
1836 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1837 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1838 }
1839 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1840 AssertMsg(pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1841
1842 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1843
1844 /*
1845 * Make a copy of the guest page table as we require valid GCPhys addresses
1846 * when removing references to physical pages.
1847 * (The HCPhys linear lookup is *extremely* expensive!)
1848 */
1849 void *pvGst;
1850 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1851 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst, (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT) ? PAGE_SIZE : PAGE_SIZE/2);
1852# ifdef VBOX_STRICT
1853 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1854 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1855 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1856 else
1857 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1858 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1859# endif
1860 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1861
1862 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1863 pPage->fDirty = true;
1864 pPage->idxDirtyEntry = (uint8_t)idxFree; Assert(pPage->idxDirtyEntry == idxFree);
1865 pPool->aDirtyPages[idxFree].uIdx = pPage->idx;
1866 pPool->cDirtyPages++;
1867
1868 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1869 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1870 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1871 {
1872 unsigned i;
1873 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1874 {
1875 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1876 if (pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX)
1877 {
1878 pPool->idxFreeDirtyPage = idxFree;
1879 break;
1880 }
1881 }
1882 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1883 }
1884
1885 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX);
1886
1887 /*
1888 * Clear all references to this shadow table. See @bugref{7298}.
1889 */
1890 pgmPoolTrackClearPageUsers(pPool, pPage);
1891}
1892# endif /* !IN_RING3 */
1893
1894
1895/**
1896 * Check if the specified page is dirty (not write monitored)
1897 *
1898 * @return dirty or not
1899 * @param pVM Pointer to the VM.
1900 * @param GCPhys Guest physical address
1901 */
1902bool pgmPoolIsDirtyPage(PVM pVM, RTGCPHYS GCPhys)
1903{
1904 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1905 PGM_LOCK_ASSERT_OWNER(pVM);
1906 if (!pPool->cDirtyPages)
1907 return false;
1908
1909 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1910
1911 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1912 {
1913 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
1914 {
1915 PPGMPOOLPAGE pPage;
1916 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
1917
1918 pPage = &pPool->aPages[idxPage];
1919 if (pPage->GCPhys == GCPhys)
1920 return true;
1921 }
1922 }
1923 return false;
1924}
1925
1926
1927/**
1928 * Reset all dirty pages by reinstating page monitoring.
1929 *
1930 * @param pVM Pointer to the VM.
1931 */
1932void pgmPoolResetDirtyPages(PVM pVM)
1933{
1934 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1935 PGM_LOCK_ASSERT_OWNER(pVM);
1936 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1937
1938 if (!pPool->cDirtyPages)
1939 return;
1940
1941 Log(("pgmPoolResetDirtyPages\n"));
1942 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1943 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1944
1945 pPool->idxFreeDirtyPage = 0;
1946 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1947 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1948 {
1949 unsigned i;
1950 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1951 {
1952 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
1953 {
1954 pPool->idxFreeDirtyPage = i;
1955 break;
1956 }
1957 }
1958 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1959 }
1960
1961 Assert(pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
1962 return;
1963}
1964
1965
1966/**
1967 * Invalidate the PT entry for the specified page
1968 *
1969 * @param pVM Pointer to the VM.
1970 * @param GCPtrPage Guest page to invalidate
1971 */
1972void pgmPoolResetDirtyPage(PVM pVM, RTGCPTR GCPtrPage)
1973{
1974 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1975 PGM_LOCK_ASSERT_OWNER(pVM);
1976 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1977
1978 if (!pPool->cDirtyPages)
1979 return;
1980
1981 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage));
1982 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1983 {
1984 }
1985}
1986
1987
1988/**
1989 * Reset all dirty pages by reinstating page monitoring.
1990 *
1991 * @param pVM Pointer to the VM.
1992 * @param GCPhysPT Physical address of the page table
1993 */
1994void pgmPoolInvalidateDirtyPage(PVM pVM, RTGCPHYS GCPhysPT)
1995{
1996 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1997 PGM_LOCK_ASSERT_OWNER(pVM);
1998 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1999 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
2000
2001 if (!pPool->cDirtyPages)
2002 return;
2003
2004 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2005
2006 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2007 {
2008 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
2009 {
2010 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
2011
2012 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
2013 if (pPage->GCPhys == GCPhysPT)
2014 {
2015 idxDirtyPage = i;
2016 break;
2017 }
2018 }
2019 }
2020
2021 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
2022 {
2023 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
2024 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
2025 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
2026 {
2027 unsigned i;
2028 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2029 {
2030 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
2031 {
2032 pPool->idxFreeDirtyPage = i;
2033 break;
2034 }
2035 }
2036 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
2037 }
2038 }
2039}
2040
2041# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
2042
2043/**
2044 * Inserts a page into the GCPhys hash table.
2045 *
2046 * @param pPool The pool.
2047 * @param pPage The page.
2048 */
2049DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2050{
2051 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
2052 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2053 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2054 pPage->iNext = pPool->aiHash[iHash];
2055 pPool->aiHash[iHash] = pPage->idx;
2056}
2057
2058
2059/**
2060 * Removes a page from the GCPhys hash table.
2061 *
2062 * @param pPool The pool.
2063 * @param pPage The page.
2064 */
2065DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2066{
2067 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
2068 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2069 if (pPool->aiHash[iHash] == pPage->idx)
2070 pPool->aiHash[iHash] = pPage->iNext;
2071 else
2072 {
2073 uint16_t iPrev = pPool->aiHash[iHash];
2074 for (;;)
2075 {
2076 const int16_t i = pPool->aPages[iPrev].iNext;
2077 if (i == pPage->idx)
2078 {
2079 pPool->aPages[iPrev].iNext = pPage->iNext;
2080 break;
2081 }
2082 if (i == NIL_PGMPOOL_IDX)
2083 {
2084 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
2085 break;
2086 }
2087 iPrev = i;
2088 }
2089 }
2090 pPage->iNext = NIL_PGMPOOL_IDX;
2091}
2092
2093
2094/**
2095 * Frees up one cache page.
2096 *
2097 * @returns VBox status code.
2098 * @retval VINF_SUCCESS on success.
2099 * @param pPool The pool.
2100 * @param iUser The user index.
2101 */
2102static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
2103{
2104#ifndef IN_RC
2105 const PVM pVM = pPool->CTX_SUFF(pVM);
2106#endif
2107 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
2108 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
2109
2110 /*
2111 * Select one page from the tail of the age list.
2112 */
2113 PPGMPOOLPAGE pPage;
2114 for (unsigned iLoop = 0; ; iLoop++)
2115 {
2116 uint16_t iToFree = pPool->iAgeTail;
2117 if (iToFree == iUser && iUser != NIL_PGMPOOL_IDX)
2118 iToFree = pPool->aPages[iToFree].iAgePrev;
2119/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2120 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2121 {
2122 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2123 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2124 {
2125 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2126 continue;
2127 iToFree = i;
2128 break;
2129 }
2130 }
2131*/
2132 Assert(iToFree != iUser);
2133 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
2134 pPage = &pPool->aPages[iToFree];
2135
2136 /*
2137 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2138 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2139 */
2140 if ( !pgmPoolIsPageLocked(pPage)
2141 && pPage->idx >= PGMPOOL_IDX_FIRST /* paranoia (#6349) */)
2142 break;
2143 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2144 pgmPoolCacheUsed(pPool, pPage);
2145 AssertLogRelReturn(iLoop < 8192, VERR_PGM_POOL_TOO_MANY_LOOPS);
2146 }
2147
2148 /*
2149 * Found a usable page, flush it and return.
2150 */
2151 int rc = pgmPoolFlushPage(pPool, pPage);
2152 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2153 /* todo: find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2154 if (rc == VINF_SUCCESS)
2155 PGM_INVL_ALL_VCPU_TLBS(pVM);
2156 return rc;
2157}
2158
2159
2160/**
2161 * Checks if a kind mismatch is really a page being reused
2162 * or if it's just normal remappings.
2163 *
2164 * @returns true if reused and the cached page (enmKind1) should be flushed
2165 * @returns false if not reused.
2166 * @param enmKind1 The kind of the cached page.
2167 * @param enmKind2 The kind of the requested page.
2168 */
2169static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2170{
2171 switch (enmKind1)
2172 {
2173 /*
2174 * Never reuse them. There is no remapping in non-paging mode.
2175 */
2176 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2177 case PGMPOOLKIND_32BIT_PD_PHYS:
2178 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2179 case PGMPOOLKIND_PAE_PD_PHYS:
2180 case PGMPOOLKIND_PAE_PDPT_PHYS:
2181 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2182 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2183 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2184 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2185 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2186 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2187 return false;
2188
2189 /*
2190 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2191 */
2192 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2193 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2194 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2195 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2196 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2197 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2198 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2199 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2200 case PGMPOOLKIND_32BIT_PD:
2201 case PGMPOOLKIND_PAE_PDPT:
2202 switch (enmKind2)
2203 {
2204 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2205 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2206 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2207 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2208 case PGMPOOLKIND_64BIT_PML4:
2209 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2210 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2211 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2212 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2213 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2214 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2215 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2216 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2217 return true;
2218 default:
2219 return false;
2220 }
2221
2222 /*
2223 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2224 */
2225 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2226 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2227 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2228 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2229 case PGMPOOLKIND_64BIT_PML4:
2230 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2231 switch (enmKind2)
2232 {
2233 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2234 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2235 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2236 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2237 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2238 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2239 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2240 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2241 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2242 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2243 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2244 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2245 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2246 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2247 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2248 return true;
2249 default:
2250 return false;
2251 }
2252
2253 /*
2254 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2255 */
2256 case PGMPOOLKIND_ROOT_NESTED:
2257 return false;
2258
2259 default:
2260 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2261 }
2262}
2263
2264
2265/**
2266 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2267 *
2268 * @returns VBox status code.
2269 * @retval VINF_PGM_CACHED_PAGE on success.
2270 * @retval VERR_FILE_NOT_FOUND if not found.
2271 * @param pPool The pool.
2272 * @param GCPhys The GC physical address of the page we're gonna shadow.
2273 * @param enmKind The kind of mapping.
2274 * @param enmAccess Access type for the mapping (only relevant for big pages)
2275 * @param fA20Enabled Whether the CPU has the A20 gate enabled.
2276 * @param iUser The shadow page pool index of the user table. This is
2277 * NIL_PGMPOOL_IDX for root pages.
2278 * @param iUserTable The index into the user table (shadowed). Ignored if
2279 * root page
2280 * @param ppPage Where to store the pointer to the page.
2281 */
2282static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
2283 uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2284{
2285 /*
2286 * Look up the GCPhys in the hash.
2287 */
2288 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2289 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2290 if (i != NIL_PGMPOOL_IDX)
2291 {
2292 do
2293 {
2294 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2295 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2296 if (pPage->GCPhys == GCPhys)
2297 {
2298 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2299 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess
2300 && pPage->fA20Enabled == fA20Enabled)
2301 {
2302 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2303 * doesn't flush it in case there are no more free use records.
2304 */
2305 pgmPoolCacheUsed(pPool, pPage);
2306
2307 int rc = VINF_SUCCESS;
2308 if (iUser != NIL_PGMPOOL_IDX)
2309 rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2310 if (RT_SUCCESS(rc))
2311 {
2312 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2313 *ppPage = pPage;
2314 if (pPage->cModifications)
2315 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2316 STAM_COUNTER_INC(&pPool->StatCacheHits);
2317 return VINF_PGM_CACHED_PAGE;
2318 }
2319 return rc;
2320 }
2321
2322 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2323 {
2324 /*
2325 * The kind is different. In some cases we should now flush the page
2326 * as it has been reused, but in most cases this is normal remapping
2327 * of PDs as PT or big pages using the GCPhys field in a slightly
2328 * different way than the other kinds.
2329 */
2330 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2331 {
2332 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2333 pgmPoolFlushPage(pPool, pPage);
2334 break;
2335 }
2336 }
2337 }
2338
2339 /* next */
2340 i = pPage->iNext;
2341 } while (i != NIL_PGMPOOL_IDX);
2342 }
2343
2344 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2345 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2346 return VERR_FILE_NOT_FOUND;
2347}
2348
2349
2350/**
2351 * Inserts a page into the cache.
2352 *
2353 * @param pPool The pool.
2354 * @param pPage The cached page.
2355 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2356 */
2357static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2358{
2359 /*
2360 * Insert into the GCPhys hash if the page is fit for that.
2361 */
2362 Assert(!pPage->fCached);
2363 if (fCanBeCached)
2364 {
2365 pPage->fCached = true;
2366 pgmPoolHashInsert(pPool, pPage);
2367 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2368 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2369 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2370 }
2371 else
2372 {
2373 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2374 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2375 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2376 }
2377
2378 /*
2379 * Insert at the head of the age list.
2380 */
2381 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2382 pPage->iAgeNext = pPool->iAgeHead;
2383 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2384 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2385 else
2386 pPool->iAgeTail = pPage->idx;
2387 pPool->iAgeHead = pPage->idx;
2388}
2389
2390
2391/**
2392 * Flushes a cached page.
2393 *
2394 * @param pPool The pool.
2395 * @param pPage The cached page.
2396 */
2397static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2398{
2399 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2400
2401 /*
2402 * Remove the page from the hash.
2403 */
2404 if (pPage->fCached)
2405 {
2406 pPage->fCached = false;
2407 pgmPoolHashRemove(pPool, pPage);
2408 }
2409 else
2410 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2411
2412 /*
2413 * Remove it from the age list.
2414 */
2415 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2416 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2417 else
2418 pPool->iAgeTail = pPage->iAgePrev;
2419 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2420 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2421 else
2422 pPool->iAgeHead = pPage->iAgeNext;
2423 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2424 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2425}
2426
2427
2428/**
2429 * Looks for pages sharing the monitor.
2430 *
2431 * @returns Pointer to the head page.
2432 * @returns NULL if not found.
2433 * @param pPool The Pool
2434 * @param pNewPage The page which is going to be monitored.
2435 */
2436static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2437{
2438 /*
2439 * Look up the GCPhys in the hash.
2440 */
2441 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2442 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2443 if (i == NIL_PGMPOOL_IDX)
2444 return NULL;
2445 do
2446 {
2447 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2448 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2449 && pPage != pNewPage)
2450 {
2451 switch (pPage->enmKind)
2452 {
2453 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2454 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2455 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2456 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2457 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2458 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2459 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2460 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2461 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2462 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2463 case PGMPOOLKIND_64BIT_PML4:
2464 case PGMPOOLKIND_32BIT_PD:
2465 case PGMPOOLKIND_PAE_PDPT:
2466 {
2467 /* find the head */
2468 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2469 {
2470 Assert(pPage->iMonitoredPrev != pPage->idx);
2471 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2472 }
2473 return pPage;
2474 }
2475
2476 /* ignore, no monitoring. */
2477 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2478 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2479 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2480 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2481 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2482 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2483 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2484 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2485 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2486 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2487 case PGMPOOLKIND_ROOT_NESTED:
2488 case PGMPOOLKIND_PAE_PD_PHYS:
2489 case PGMPOOLKIND_PAE_PDPT_PHYS:
2490 case PGMPOOLKIND_32BIT_PD_PHYS:
2491 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2492 break;
2493 default:
2494 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2495 }
2496 }
2497
2498 /* next */
2499 i = pPage->iNext;
2500 } while (i != NIL_PGMPOOL_IDX);
2501 return NULL;
2502}
2503
2504
2505/**
2506 * Enabled write monitoring of a guest page.
2507 *
2508 * @returns VBox status code.
2509 * @retval VINF_SUCCESS on success.
2510 * @param pPool The pool.
2511 * @param pPage The cached page.
2512 */
2513static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2514{
2515 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2516
2517 /*
2518 * Filter out the relevant kinds.
2519 */
2520 switch (pPage->enmKind)
2521 {
2522 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2523 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2524 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2525 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2526 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2527 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2528 case PGMPOOLKIND_64BIT_PML4:
2529 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2530 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2531 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2532 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2533 case PGMPOOLKIND_32BIT_PD:
2534 case PGMPOOLKIND_PAE_PDPT:
2535 break;
2536
2537 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2538 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2539 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2540 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2541 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2542 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2543 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2544 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2545 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2546 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2547 case PGMPOOLKIND_ROOT_NESTED:
2548 /* Nothing to monitor here. */
2549 return VINF_SUCCESS;
2550
2551 case PGMPOOLKIND_32BIT_PD_PHYS:
2552 case PGMPOOLKIND_PAE_PDPT_PHYS:
2553 case PGMPOOLKIND_PAE_PD_PHYS:
2554 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2555 /* Nothing to monitor here. */
2556 return VINF_SUCCESS;
2557 default:
2558 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2559 }
2560
2561 /*
2562 * Install handler.
2563 */
2564 int rc;
2565 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2566 if (pPageHead)
2567 {
2568 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2569 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2570
2571#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2572 if (pPageHead->fDirty)
2573 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirtyEntry, false /* do not remove */);
2574#endif
2575
2576 pPage->iMonitoredPrev = pPageHead->idx;
2577 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2578 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2579 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2580 pPageHead->iMonitoredNext = pPage->idx;
2581 rc = VINF_SUCCESS;
2582 }
2583 else
2584 {
2585 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2586 PVM pVM = pPool->CTX_SUFF(pVM);
2587 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2588 rc = PGMHandlerPhysicalRegister(pVM, GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK, pPool->hAccessHandlerType,
2589 MMHyperCCToR3(pVM, pPage), MMHyperCCToR0(pVM, pPage), MMHyperCCToRC(pVM, pPage),
2590 NIL_RTR3PTR /*pszDesc*/);
2591 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2592 * the heap size should suffice. */
2593 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2594 PVMCPU pVCpu = VMMGetCpu(pVM);
2595 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2596 }
2597 pPage->fMonitored = true;
2598 return rc;
2599}
2600
2601
2602/**
2603 * Disables write monitoring of a guest page.
2604 *
2605 * @returns VBox status code.
2606 * @retval VINF_SUCCESS on success.
2607 * @param pPool The pool.
2608 * @param pPage The cached page.
2609 */
2610static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2611{
2612 /*
2613 * Filter out the relevant kinds.
2614 */
2615 switch (pPage->enmKind)
2616 {
2617 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2618 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2619 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2620 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2621 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2622 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2623 case PGMPOOLKIND_64BIT_PML4:
2624 case PGMPOOLKIND_32BIT_PD:
2625 case PGMPOOLKIND_PAE_PDPT:
2626 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2627 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2628 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2629 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2630 break;
2631
2632 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2633 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2634 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2635 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2636 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2637 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2638 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2639 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2640 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2641 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2642 case PGMPOOLKIND_ROOT_NESTED:
2643 case PGMPOOLKIND_PAE_PD_PHYS:
2644 case PGMPOOLKIND_PAE_PDPT_PHYS:
2645 case PGMPOOLKIND_32BIT_PD_PHYS:
2646 /* Nothing to monitor here. */
2647 Assert(!pPage->fMonitored);
2648 return VINF_SUCCESS;
2649
2650 default:
2651 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2652 }
2653 Assert(pPage->fMonitored);
2654
2655 /*
2656 * Remove the page from the monitored list or uninstall it if last.
2657 */
2658 const PVM pVM = pPool->CTX_SUFF(pVM);
2659 int rc;
2660 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2661 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2662 {
2663 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2664 {
2665 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2666 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2667 rc = PGMHandlerPhysicalChangeUserArgs(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK, MMHyperCCToR3(pVM, pNewHead),
2668 MMHyperCCToR0(pVM, pNewHead), MMHyperCCToRC(pVM, pNewHead));
2669
2670 AssertFatalRCSuccess(rc);
2671 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2672 }
2673 else
2674 {
2675 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2676 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2677 {
2678 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2679 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2680 }
2681 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2682 rc = VINF_SUCCESS;
2683 }
2684 }
2685 else
2686 {
2687 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2688 AssertFatalRC(rc);
2689 PVMCPU pVCpu = VMMGetCpu(pVM);
2690 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2691 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2692 }
2693 pPage->fMonitored = false;
2694
2695 /*
2696 * Remove it from the list of modified pages (if in it).
2697 */
2698 pgmPoolMonitorModifiedRemove(pPool, pPage);
2699
2700 return rc;
2701}
2702
2703
2704/**
2705 * Inserts the page into the list of modified pages.
2706 *
2707 * @param pPool The pool.
2708 * @param pPage The page.
2709 */
2710void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2711{
2712 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2713 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2714 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2715 && pPool->iModifiedHead != pPage->idx,
2716 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2717 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2718 pPool->iModifiedHead, pPool->cModifiedPages));
2719
2720 pPage->iModifiedNext = pPool->iModifiedHead;
2721 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2722 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2723 pPool->iModifiedHead = pPage->idx;
2724 pPool->cModifiedPages++;
2725#ifdef VBOX_WITH_STATISTICS
2726 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2727 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2728#endif
2729}
2730
2731
2732/**
2733 * Removes the page from the list of modified pages and resets the
2734 * modification counter.
2735 *
2736 * @param pPool The pool.
2737 * @param pPage The page which is believed to be in the list of modified pages.
2738 */
2739static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2740{
2741 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2742 if (pPool->iModifiedHead == pPage->idx)
2743 {
2744 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2745 pPool->iModifiedHead = pPage->iModifiedNext;
2746 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2747 {
2748 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2749 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2750 }
2751 pPool->cModifiedPages--;
2752 }
2753 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2754 {
2755 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2756 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2757 {
2758 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2759 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2760 }
2761 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2762 pPool->cModifiedPages--;
2763 }
2764 else
2765 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2766 pPage->cModifications = 0;
2767}
2768
2769
2770/**
2771 * Zaps the list of modified pages, resetting their modification counters in the process.
2772 *
2773 * @param pVM Pointer to the VM.
2774 */
2775static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2776{
2777 pgmLock(pVM);
2778 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2779 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2780
2781 unsigned cPages = 0; NOREF(cPages);
2782
2783#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2784 pgmPoolResetDirtyPages(pVM);
2785#endif
2786
2787 uint16_t idx = pPool->iModifiedHead;
2788 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2789 while (idx != NIL_PGMPOOL_IDX)
2790 {
2791 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2792 idx = pPage->iModifiedNext;
2793 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2794 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2795 pPage->cModifications = 0;
2796 Assert(++cPages);
2797 }
2798 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2799 pPool->cModifiedPages = 0;
2800 pgmUnlock(pVM);
2801}
2802
2803
2804/**
2805 * Handle SyncCR3 pool tasks
2806 *
2807 * @returns VBox status code.
2808 * @retval VINF_SUCCESS if successfully added.
2809 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2810 * @param pVCpu Pointer to the VMCPU.
2811 * @remark Should only be used when monitoring is available, thus placed in
2812 * the PGMPOOL_WITH_MONITORING #ifdef.
2813 */
2814int pgmPoolSyncCR3(PVMCPU pVCpu)
2815{
2816 PVM pVM = pVCpu->CTX_SUFF(pVM);
2817 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2818
2819 /*
2820 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2821 * Occasionally we will have to clear all the shadow page tables because we wanted
2822 * to monitor a page which was mapped by too many shadowed page tables. This operation
2823 * sometimes referred to as a 'lightweight flush'.
2824 */
2825# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2826 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2827 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2828# else /* !IN_RING3 */
2829 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2830 {
2831 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2832 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2833
2834 /* Make sure all other VCPUs return to ring 3. */
2835 if (pVM->cCpus > 1)
2836 {
2837 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2838 PGM_INVL_ALL_VCPU_TLBS(pVM);
2839 }
2840 return VINF_PGM_SYNC_CR3;
2841 }
2842# endif /* !IN_RING3 */
2843 else
2844 {
2845 pgmPoolMonitorModifiedClearAll(pVM);
2846
2847 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2848 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2849 {
2850 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2851 return pgmPoolSyncCR3(pVCpu);
2852 }
2853 }
2854 return VINF_SUCCESS;
2855}
2856
2857
2858/**
2859 * Frees up at least one user entry.
2860 *
2861 * @returns VBox status code.
2862 * @retval VINF_SUCCESS if successfully added.
2863 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2864 * @param pPool The pool.
2865 * @param iUser The user index.
2866 */
2867static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2868{
2869 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2870 /*
2871 * Just free cached pages in a braindead fashion.
2872 */
2873 /** @todo walk the age list backwards and free the first with usage. */
2874 int rc = VINF_SUCCESS;
2875 do
2876 {
2877 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2878 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2879 rc = rc2;
2880 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2881 return rc;
2882}
2883
2884
2885/**
2886 * Inserts a page into the cache.
2887 *
2888 * This will create user node for the page, insert it into the GCPhys
2889 * hash, and insert it into the age list.
2890 *
2891 * @returns VBox status code.
2892 * @retval VINF_SUCCESS if successfully added.
2893 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2894 * @param pPool The pool.
2895 * @param pPage The cached page.
2896 * @param GCPhys The GC physical address of the page we're gonna shadow.
2897 * @param iUser The user index.
2898 * @param iUserTable The user table index.
2899 */
2900DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2901{
2902 int rc = VINF_SUCCESS;
2903 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2904
2905 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable));
2906
2907 if (iUser != NIL_PGMPOOL_IDX)
2908 {
2909#ifdef VBOX_STRICT
2910 /*
2911 * Check that the entry doesn't already exists.
2912 */
2913 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2914 {
2915 uint16_t i = pPage->iUserHead;
2916 do
2917 {
2918 Assert(i < pPool->cMaxUsers);
2919 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2920 i = paUsers[i].iNext;
2921 } while (i != NIL_PGMPOOL_USER_INDEX);
2922 }
2923#endif
2924
2925 /*
2926 * Find free a user node.
2927 */
2928 uint16_t i = pPool->iUserFreeHead;
2929 if (i == NIL_PGMPOOL_USER_INDEX)
2930 {
2931 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2932 if (RT_FAILURE(rc))
2933 return rc;
2934 i = pPool->iUserFreeHead;
2935 }
2936
2937 /*
2938 * Unlink the user node from the free list,
2939 * initialize and insert it into the user list.
2940 */
2941 pPool->iUserFreeHead = paUsers[i].iNext;
2942 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2943 paUsers[i].iUser = iUser;
2944 paUsers[i].iUserTable = iUserTable;
2945 pPage->iUserHead = i;
2946 }
2947 else
2948 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2949
2950
2951 /*
2952 * Insert into cache and enable monitoring of the guest page if enabled.
2953 *
2954 * Until we implement caching of all levels, including the CR3 one, we'll
2955 * have to make sure we don't try monitor & cache any recursive reuse of
2956 * a monitored CR3 page. Because all windows versions are doing this we'll
2957 * have to be able to do combined access monitoring, CR3 + PT and
2958 * PD + PT (guest PAE).
2959 *
2960 * Update:
2961 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2962 */
2963 const bool fCanBeMonitored = true;
2964 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2965 if (fCanBeMonitored)
2966 {
2967 rc = pgmPoolMonitorInsert(pPool, pPage);
2968 AssertRC(rc);
2969 }
2970 return rc;
2971}
2972
2973
2974/**
2975 * Adds a user reference to a page.
2976 *
2977 * This will move the page to the head of the
2978 *
2979 * @returns VBox status code.
2980 * @retval VINF_SUCCESS if successfully added.
2981 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2982 * @param pPool The pool.
2983 * @param pPage The cached page.
2984 * @param iUser The user index.
2985 * @param iUserTable The user table.
2986 */
2987static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2988{
2989 Log3(("pgmPoolTrackAddUser: GCPhys=%RGp iUser=%%x iUserTable=%x\n", pPage->GCPhys, iUser, iUserTable));
2990 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2991 Assert(iUser != NIL_PGMPOOL_IDX);
2992
2993# ifdef VBOX_STRICT
2994 /*
2995 * Check that the entry doesn't already exists. We only allow multiple
2996 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
2997 */
2998 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2999 {
3000 uint16_t i = pPage->iUserHead;
3001 do
3002 {
3003 Assert(i < pPool->cMaxUsers);
3004 /** @todo this assertion looks odd... Shouldn't it be && here? */
3005 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
3006 i = paUsers[i].iNext;
3007 } while (i != NIL_PGMPOOL_USER_INDEX);
3008 }
3009# endif
3010
3011 /*
3012 * Allocate a user node.
3013 */
3014 uint16_t i = pPool->iUserFreeHead;
3015 if (i == NIL_PGMPOOL_USER_INDEX)
3016 {
3017 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
3018 if (RT_FAILURE(rc))
3019 return rc;
3020 i = pPool->iUserFreeHead;
3021 }
3022 pPool->iUserFreeHead = paUsers[i].iNext;
3023
3024 /*
3025 * Initialize the user node and insert it.
3026 */
3027 paUsers[i].iNext = pPage->iUserHead;
3028 paUsers[i].iUser = iUser;
3029 paUsers[i].iUserTable = iUserTable;
3030 pPage->iUserHead = i;
3031
3032# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3033 if (pPage->fDirty)
3034 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirtyEntry, false /* do not remove */);
3035# endif
3036
3037 /*
3038 * Tell the cache to update its replacement stats for this page.
3039 */
3040 pgmPoolCacheUsed(pPool, pPage);
3041 return VINF_SUCCESS;
3042}
3043
3044
3045/**
3046 * Frees a user record associated with a page.
3047 *
3048 * This does not clear the entry in the user table, it simply replaces the
3049 * user record to the chain of free records.
3050 *
3051 * @param pPool The pool.
3052 * @param HCPhys The HC physical address of the shadow page.
3053 * @param iUser The shadow page pool index of the user table.
3054 * @param iUserTable The index into the user table (shadowed).
3055 *
3056 * @remarks Don't call this for root pages.
3057 */
3058static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3059{
3060 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
3061 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3062 Assert(iUser != NIL_PGMPOOL_IDX);
3063
3064 /*
3065 * Unlink and free the specified user entry.
3066 */
3067
3068 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
3069 uint16_t i = pPage->iUserHead;
3070 if ( i != NIL_PGMPOOL_USER_INDEX
3071 && paUsers[i].iUser == iUser
3072 && paUsers[i].iUserTable == iUserTable)
3073 {
3074 pPage->iUserHead = paUsers[i].iNext;
3075
3076 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3077 paUsers[i].iNext = pPool->iUserFreeHead;
3078 pPool->iUserFreeHead = i;
3079 return;
3080 }
3081
3082 /* General: Linear search. */
3083 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
3084 while (i != NIL_PGMPOOL_USER_INDEX)
3085 {
3086 if ( paUsers[i].iUser == iUser
3087 && paUsers[i].iUserTable == iUserTable)
3088 {
3089 if (iPrev != NIL_PGMPOOL_USER_INDEX)
3090 paUsers[iPrev].iNext = paUsers[i].iNext;
3091 else
3092 pPage->iUserHead = paUsers[i].iNext;
3093
3094 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3095 paUsers[i].iNext = pPool->iUserFreeHead;
3096 pPool->iUserFreeHead = i;
3097 return;
3098 }
3099 iPrev = i;
3100 i = paUsers[i].iNext;
3101 }
3102
3103 /* Fatal: didn't find it */
3104 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
3105 iUser, iUserTable, pPage->GCPhys));
3106}
3107
3108
3109/**
3110 * Gets the entry size of a shadow table.
3111 *
3112 * @param enmKind The kind of page.
3113 *
3114 * @returns The size of the entry in bytes. That is, 4 or 8.
3115 * @returns If the kind is not for a table, an assertion is raised and 0 is
3116 * returned.
3117 */
3118DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
3119{
3120 switch (enmKind)
3121 {
3122 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3123 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3124 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3125 case PGMPOOLKIND_32BIT_PD:
3126 case PGMPOOLKIND_32BIT_PD_PHYS:
3127 return 4;
3128
3129 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3130 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3131 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3132 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3133 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3134 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3135 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3136 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3137 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3138 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3139 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3140 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3141 case PGMPOOLKIND_64BIT_PML4:
3142 case PGMPOOLKIND_PAE_PDPT:
3143 case PGMPOOLKIND_ROOT_NESTED:
3144 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3145 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3146 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3147 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3148 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3149 case PGMPOOLKIND_PAE_PD_PHYS:
3150 case PGMPOOLKIND_PAE_PDPT_PHYS:
3151 return 8;
3152
3153 default:
3154 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3155 }
3156}
3157
3158
3159/**
3160 * Gets the entry size of a guest table.
3161 *
3162 * @param enmKind The kind of page.
3163 *
3164 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3165 * @returns If the kind is not for a table, an assertion is raised and 0 is
3166 * returned.
3167 */
3168DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3169{
3170 switch (enmKind)
3171 {
3172 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3173 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3174 case PGMPOOLKIND_32BIT_PD:
3175 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3176 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3177 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3178 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3179 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3180 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3181 return 4;
3182
3183 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3184 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3185 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3186 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3187 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3188 case PGMPOOLKIND_64BIT_PML4:
3189 case PGMPOOLKIND_PAE_PDPT:
3190 return 8;
3191
3192 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3193 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3194 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3195 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3196 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3197 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3198 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3199 case PGMPOOLKIND_ROOT_NESTED:
3200 case PGMPOOLKIND_PAE_PD_PHYS:
3201 case PGMPOOLKIND_PAE_PDPT_PHYS:
3202 case PGMPOOLKIND_32BIT_PD_PHYS:
3203 /** @todo can we return 0? (nobody is calling this...) */
3204 AssertFailed();
3205 return 0;
3206
3207 default:
3208 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3209 }
3210}
3211
3212
3213/**
3214 * Checks one shadow page table entry for a mapping of a physical page.
3215 *
3216 * @returns true / false indicating removal of all relevant PTEs
3217 *
3218 * @param pVM Pointer to the VM.
3219 * @param pPhysPage The guest page in question.
3220 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3221 * @param iShw The shadow page table.
3222 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3223 */
3224static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3225{
3226 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3227 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3228 bool fRet = false;
3229
3230 /*
3231 * Assert sanity.
3232 */
3233 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3234 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3235 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3236
3237 /*
3238 * Then, clear the actual mappings to the page in the shadow PT.
3239 */
3240 switch (pPage->enmKind)
3241 {
3242 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3243 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3244 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3245 {
3246 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3247 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3248 uint32_t u32AndMask = 0;
3249 uint32_t u32OrMask = 0;
3250
3251 if (!fFlushPTEs)
3252 {
3253 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3254 {
3255 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
3256 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
3257 u32OrMask = X86_PTE_RW;
3258 u32AndMask = UINT32_MAX;
3259 fRet = true;
3260 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3261 break;
3262
3263 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3264 u32OrMask = 0;
3265 u32AndMask = ~X86_PTE_RW;
3266 fRet = true;
3267 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3268 break;
3269 default:
3270 /* (shouldn't be here, will assert below) */
3271 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3272 break;
3273 }
3274 }
3275 else
3276 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3277
3278 /* Update the counter if we're removing references. */
3279 if (!u32AndMask)
3280 {
3281 Assert(pPage->cPresent);
3282 Assert(pPool->cPresent);
3283 pPage->cPresent--;
3284 pPool->cPresent--;
3285 }
3286
3287 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3288 {
3289 X86PTE Pte;
3290
3291 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3292 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3293 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3294 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3295
3296 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3297 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3298 return fRet;
3299 }
3300#ifdef LOG_ENABLED
3301 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3302 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3303 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3304 {
3305 Log(("i=%d cFound=%d\n", i, ++cFound));
3306 }
3307#endif
3308 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3309 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3310 break;
3311 }
3312
3313 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3314 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3315 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3316 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3317 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3318 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3319 {
3320 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3321 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3322 uint64_t u64OrMask = 0;
3323 uint64_t u64AndMask = 0;
3324
3325 if (!fFlushPTEs)
3326 {
3327 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3328 {
3329 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3330 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3331 u64OrMask = X86_PTE_RW;
3332 u64AndMask = UINT64_MAX;
3333 fRet = true;
3334 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3335 break;
3336
3337 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3338 u64OrMask = 0;
3339 u64AndMask = ~(uint64_t)X86_PTE_RW;
3340 fRet = true;
3341 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3342 break;
3343
3344 default:
3345 /* (shouldn't be here, will assert below) */
3346 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3347 break;
3348 }
3349 }
3350 else
3351 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3352
3353 /* Update the counter if we're removing references. */
3354 if (!u64AndMask)
3355 {
3356 Assert(pPage->cPresent);
3357 Assert(pPool->cPresent);
3358 pPage->cPresent--;
3359 pPool->cPresent--;
3360 }
3361
3362 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3363 {
3364 X86PTEPAE Pte;
3365
3366 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3367 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3368 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3369 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3370
3371 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3372 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3373 return fRet;
3374 }
3375#ifdef LOG_ENABLED
3376 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3377 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3378 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3379 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3380 Log(("i=%d cFound=%d\n", i, ++cFound));
3381#endif
3382 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3383 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3384 break;
3385 }
3386
3387#ifdef PGM_WITH_LARGE_PAGES
3388 /* Large page case only. */
3389 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3390 {
3391 Assert(pVM->pgm.s.fNestedPaging);
3392
3393 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3394 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3395
3396 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3397 {
3398 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3399 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3400 pPD->a[iPte].u = 0;
3401 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3402
3403 /* Update the counter as we're removing references. */
3404 Assert(pPage->cPresent);
3405 Assert(pPool->cPresent);
3406 pPage->cPresent--;
3407 pPool->cPresent--;
3408
3409 return fRet;
3410 }
3411# ifdef LOG_ENABLED
3412 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3413 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3414 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3415 Log(("i=%d cFound=%d\n", i, ++cFound));
3416# endif
3417 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3418 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3419 break;
3420 }
3421
3422 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3423 case PGMPOOLKIND_PAE_PD_PHYS:
3424 {
3425 Assert(pVM->pgm.s.fNestedPaging);
3426
3427 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3428 PX86PD pPD = (PX86PD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3429
3430 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3431 {
3432 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3433 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3434 pPD->a[iPte].u = 0;
3435 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3436
3437 /* Update the counter as we're removing references. */
3438 Assert(pPage->cPresent);
3439 Assert(pPool->cPresent);
3440 pPage->cPresent--;
3441 pPool->cPresent--;
3442 return fRet;
3443 }
3444# ifdef LOG_ENABLED
3445 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3446 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3447 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3448 Log(("i=%d cFound=%d\n", i, ++cFound));
3449# endif
3450 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3451 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3452 break;
3453 }
3454#endif /* PGM_WITH_LARGE_PAGES */
3455
3456 default:
3457 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3458 }
3459
3460 /* not reached. */
3461#ifndef _MSC_VER
3462 return fRet;
3463#endif
3464}
3465
3466
3467/**
3468 * Scans one shadow page table for mappings of a physical page.
3469 *
3470 * @param pVM Pointer to the VM.
3471 * @param pPhysPage The guest page in question.
3472 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3473 * @param iShw The shadow page table.
3474 */
3475static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3476{
3477 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3478
3479 /* We should only come here with when there's only one reference to this physical page. */
3480 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3481
3482 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3483 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3484 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3485 if (!fKeptPTEs)
3486 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3487 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3488}
3489
3490
3491/**
3492 * Flushes a list of shadow page tables mapping the same physical page.
3493 *
3494 * @param pVM Pointer to the VM.
3495 * @param pPhysPage The guest page in question.
3496 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3497 * @param iPhysExt The physical cross reference extent list to flush.
3498 */
3499static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3500{
3501 PGM_LOCK_ASSERT_OWNER(pVM);
3502 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3503 bool fKeepList = false;
3504
3505 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3506 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3507
3508 const uint16_t iPhysExtStart = iPhysExt;
3509 PPGMPOOLPHYSEXT pPhysExt;
3510 do
3511 {
3512 Assert(iPhysExt < pPool->cMaxPhysExts);
3513 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3514 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3515 {
3516 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3517 {
3518 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3519 if (!fKeptPTEs)
3520 {
3521 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3522 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3523 }
3524 else
3525 fKeepList = true;
3526 }
3527 }
3528 /* next */
3529 iPhysExt = pPhysExt->iNext;
3530 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3531
3532 if (!fKeepList)
3533 {
3534 /* insert the list into the free list and clear the ram range entry. */
3535 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3536 pPool->iPhysExtFreeHead = iPhysExtStart;
3537 /* Invalidate the tracking data. */
3538 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3539 }
3540
3541 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3542}
3543
3544
3545/**
3546 * Flushes all shadow page table mappings of the given guest page.
3547 *
3548 * This is typically called when the host page backing the guest one has been
3549 * replaced or when the page protection was changed due to a guest access
3550 * caught by the monitoring.
3551 *
3552 * @returns VBox status code.
3553 * @retval VINF_SUCCESS if all references has been successfully cleared.
3554 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3555 * pool cleaning. FF and sync flags are set.
3556 *
3557 * @param pVM Pointer to the VM.
3558 * @param GCPhysPage GC physical address of the page in question
3559 * @param pPhysPage The guest page in question.
3560 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3561 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3562 * flushed, it is NOT touched if this isn't necessary.
3563 * The caller MUST initialized this to @a false.
3564 */
3565int pgmPoolTrackUpdateGCPhys(PVM pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3566{
3567 PVMCPU pVCpu = VMMGetCpu(pVM);
3568 pgmLock(pVM);
3569 int rc = VINF_SUCCESS;
3570
3571#ifdef PGM_WITH_LARGE_PAGES
3572 /* Is this page part of a large page? */
3573 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3574 {
3575 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3576 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3577
3578 /* Fetch the large page base. */
3579 PPGMPAGE pLargePage;
3580 if (GCPhysBase != GCPhysPage)
3581 {
3582 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3583 AssertFatal(pLargePage);
3584 }
3585 else
3586 pLargePage = pPhysPage;
3587
3588 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3589
3590 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3591 {
3592 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3593 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3594 pVM->pgm.s.cLargePagesDisabled++;
3595
3596 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3597 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3598
3599 *pfFlushTLBs = true;
3600 pgmUnlock(pVM);
3601 return rc;
3602 }
3603 }
3604#else
3605 NOREF(GCPhysPage);
3606#endif /* PGM_WITH_LARGE_PAGES */
3607
3608 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3609 if (u16)
3610 {
3611 /*
3612 * The zero page is currently screwing up the tracking and we'll
3613 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3614 * is defined, zero pages won't normally be mapped. Some kind of solution
3615 * will be needed for this problem of course, but it will have to wait...
3616 */
3617 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3618 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3619 rc = VINF_PGM_GCPHYS_ALIASED;
3620 else
3621 {
3622# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC) /** @todo we can drop this now. */
3623 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3624 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3625 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
3626# endif
3627
3628 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3629 {
3630 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3631 pgmPoolTrackFlushGCPhysPT(pVM,
3632 pPhysPage,
3633 fFlushPTEs,
3634 PGMPOOL_TD_GET_IDX(u16));
3635 }
3636 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3637 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3638 else
3639 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3640 *pfFlushTLBs = true;
3641
3642# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
3643 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
3644# endif
3645 }
3646 }
3647
3648 if (rc == VINF_PGM_GCPHYS_ALIASED)
3649 {
3650 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3651 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3652 rc = VINF_PGM_SYNC_CR3;
3653 }
3654 pgmUnlock(pVM);
3655 return rc;
3656}
3657
3658
3659/**
3660 * Scans all shadow page tables for mappings of a physical page.
3661 *
3662 * This may be slow, but it's most likely more efficient than cleaning
3663 * out the entire page pool / cache.
3664 *
3665 * @returns VBox status code.
3666 * @retval VINF_SUCCESS if all references has been successfully cleared.
3667 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3668 * a page pool cleaning.
3669 *
3670 * @param pVM Pointer to the VM.
3671 * @param pPhysPage The guest page in question.
3672 */
3673int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3674{
3675 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3676 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3677 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3678 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3679
3680 /*
3681 * There is a limit to what makes sense.
3682 */
3683 if ( pPool->cPresent > 1024
3684 && pVM->cCpus == 1)
3685 {
3686 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3687 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3688 return VINF_PGM_GCPHYS_ALIASED;
3689 }
3690
3691 /*
3692 * Iterate all the pages until we've encountered all that in use.
3693 * This is simple but not quite optimal solution.
3694 */
3695 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P; /** @todo drop X86_PTE_P here as we always test if present separately, anyway. */
3696 const uint32_t u32 = u64; /** @todo move into the 32BIT_PT_xx case */
3697 unsigned cLeft = pPool->cUsedPages;
3698 unsigned iPage = pPool->cCurPages;
3699 while (--iPage >= PGMPOOL_IDX_FIRST)
3700 {
3701 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3702 if ( pPage->GCPhys != NIL_RTGCPHYS
3703 && pPage->cPresent)
3704 {
3705 switch (pPage->enmKind)
3706 {
3707 /*
3708 * We only care about shadow page tables.
3709 */
3710 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3711 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3712 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3713 {
3714 unsigned cPresent = pPage->cPresent;
3715 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3716 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3717 if (pPT->a[i].n.u1Present)
3718 {
3719 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3720 {
3721 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3722 pPT->a[i].u = 0;
3723
3724 /* Update the counter as we're removing references. */
3725 Assert(pPage->cPresent);
3726 Assert(pPool->cPresent);
3727 pPage->cPresent--;
3728 pPool->cPresent--;
3729 }
3730 if (!--cPresent)
3731 break;
3732 }
3733 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3734 break;
3735 }
3736
3737 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3738 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3739 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3740 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3741 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3742 {
3743 unsigned cPresent = pPage->cPresent;
3744 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3745 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3746 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3747 {
3748 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3749 {
3750 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3751 PGMSHWPTEPAE_SET(pPT->a[i], 0); /// @todo why not atomic?
3752
3753 /* Update the counter as we're removing references. */
3754 Assert(pPage->cPresent);
3755 Assert(pPool->cPresent);
3756 pPage->cPresent--;
3757 pPool->cPresent--;
3758 }
3759 if (!--cPresent)
3760 break;
3761 }
3762 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3763 break;
3764 }
3765#ifndef IN_RC
3766 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3767 {
3768 unsigned cPresent = pPage->cPresent;
3769 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3770 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3771 if (pPT->a[i].n.u1Present)
3772 {
3773 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3774 {
3775 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3776 pPT->a[i].u = 0;
3777
3778 /* Update the counter as we're removing references. */
3779 Assert(pPage->cPresent);
3780 Assert(pPool->cPresent);
3781 pPage->cPresent--;
3782 pPool->cPresent--;
3783 }
3784 if (!--cPresent)
3785 break;
3786 }
3787 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3788 break;
3789 }
3790#endif
3791 }
3792 if (!--cLeft)
3793 break;
3794 }
3795 }
3796
3797 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3798 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3799
3800 /*
3801 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3802 */
3803 if (pPool->cPresent > 1024)
3804 {
3805 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3806 return VINF_PGM_GCPHYS_ALIASED;
3807 }
3808
3809 return VINF_SUCCESS;
3810}
3811
3812
3813/**
3814 * Clears the user entry in a user table.
3815 *
3816 * This is used to remove all references to a page when flushing it.
3817 */
3818static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3819{
3820 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3821 Assert(pUser->iUser < pPool->cCurPages);
3822 uint32_t iUserTable = pUser->iUserTable;
3823
3824 /*
3825 * Map the user page. Ignore references made by fictitious pages.
3826 */
3827 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3828 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3829 union
3830 {
3831 uint64_t *pau64;
3832 uint32_t *pau32;
3833 } u;
3834 if (pUserPage->idx < PGMPOOL_IDX_FIRST)
3835 {
3836 Assert(!pUserPage->pvPageR3);
3837 return;
3838 }
3839 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3840
3841
3842 /* Safety precaution in case we change the paging for other modes too in the future. */
3843 Assert(!pgmPoolIsPageLocked(pPage));
3844
3845#ifdef VBOX_STRICT
3846 /*
3847 * Some sanity checks.
3848 */
3849 switch (pUserPage->enmKind)
3850 {
3851 case PGMPOOLKIND_32BIT_PD:
3852 case PGMPOOLKIND_32BIT_PD_PHYS:
3853 Assert(iUserTable < X86_PG_ENTRIES);
3854 break;
3855 case PGMPOOLKIND_PAE_PDPT:
3856 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3857 case PGMPOOLKIND_PAE_PDPT_PHYS:
3858 Assert(iUserTable < 4);
3859 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3860 break;
3861 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3862 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3863 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3864 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3865 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3866 case PGMPOOLKIND_PAE_PD_PHYS:
3867 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3868 break;
3869 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3870 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3871 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3872 break;
3873 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3874 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3875 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3876 break;
3877 case PGMPOOLKIND_64BIT_PML4:
3878 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3879 /* GCPhys >> PAGE_SHIFT is the index here */
3880 break;
3881 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3882 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3883 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3884 break;
3885
3886 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3887 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3888 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3889 break;
3890
3891 case PGMPOOLKIND_ROOT_NESTED:
3892 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3893 break;
3894
3895 default:
3896 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3897 break;
3898 }
3899#endif /* VBOX_STRICT */
3900
3901 /*
3902 * Clear the entry in the user page.
3903 */
3904 switch (pUserPage->enmKind)
3905 {
3906 /* 32-bit entries */
3907 case PGMPOOLKIND_32BIT_PD:
3908 case PGMPOOLKIND_32BIT_PD_PHYS:
3909 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
3910 break;
3911
3912 /* 64-bit entries */
3913 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3914 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3915 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3916 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3917 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3918#ifdef IN_RC
3919 /*
3920 * In 32 bits PAE mode we *must* invalidate the TLB when changing a
3921 * PDPT entry; the CPU fetches them only during cr3 load, so any
3922 * non-present PDPT will continue to cause page faults.
3923 */
3924 ASMReloadCR3();
3925 /* no break */
3926#endif
3927 case PGMPOOLKIND_PAE_PD_PHYS:
3928 case PGMPOOLKIND_PAE_PDPT_PHYS:
3929 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3930 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3931 case PGMPOOLKIND_64BIT_PML4:
3932 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3933 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3934 case PGMPOOLKIND_PAE_PDPT:
3935 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3936 case PGMPOOLKIND_ROOT_NESTED:
3937 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3938 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3939 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
3940 break;
3941
3942 default:
3943 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3944 }
3945 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
3946}
3947
3948
3949/**
3950 * Clears all users of a page.
3951 */
3952static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3953{
3954 /*
3955 * Free all the user records.
3956 */
3957 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3958
3959 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3960 uint16_t i = pPage->iUserHead;
3961 while (i != NIL_PGMPOOL_USER_INDEX)
3962 {
3963 /* Clear enter in user table. */
3964 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3965
3966 /* Free it. */
3967 const uint16_t iNext = paUsers[i].iNext;
3968 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3969 paUsers[i].iNext = pPool->iUserFreeHead;
3970 pPool->iUserFreeHead = i;
3971
3972 /* Next. */
3973 i = iNext;
3974 }
3975 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3976}
3977
3978
3979/**
3980 * Allocates a new physical cross reference extent.
3981 *
3982 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3983 * @param pVM Pointer to the VM.
3984 * @param piPhysExt Where to store the phys ext index.
3985 */
3986PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3987{
3988 PGM_LOCK_ASSERT_OWNER(pVM);
3989 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3990 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3991 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3992 {
3993 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3994 return NULL;
3995 }
3996 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3997 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3998 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3999 *piPhysExt = iPhysExt;
4000 return pPhysExt;
4001}
4002
4003
4004/**
4005 * Frees a physical cross reference extent.
4006 *
4007 * @param pVM Pointer to the VM.
4008 * @param iPhysExt The extent to free.
4009 */
4010void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
4011{
4012 PGM_LOCK_ASSERT_OWNER(pVM);
4013 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4014 Assert(iPhysExt < pPool->cMaxPhysExts);
4015 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4016 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4017 {
4018 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4019 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4020 }
4021 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4022 pPool->iPhysExtFreeHead = iPhysExt;
4023}
4024
4025
4026/**
4027 * Frees a physical cross reference extent.
4028 *
4029 * @param pVM Pointer to the VM.
4030 * @param iPhysExt The extent to free.
4031 */
4032void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
4033{
4034 PGM_LOCK_ASSERT_OWNER(pVM);
4035 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4036
4037 const uint16_t iPhysExtStart = iPhysExt;
4038 PPGMPOOLPHYSEXT pPhysExt;
4039 do
4040 {
4041 Assert(iPhysExt < pPool->cMaxPhysExts);
4042 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4043 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4044 {
4045 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4046 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4047 }
4048
4049 /* next */
4050 iPhysExt = pPhysExt->iNext;
4051 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4052
4053 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4054 pPool->iPhysExtFreeHead = iPhysExtStart;
4055}
4056
4057
4058/**
4059 * Insert a reference into a list of physical cross reference extents.
4060 *
4061 * @returns The new tracking data for PGMPAGE.
4062 *
4063 * @param pVM Pointer to the VM.
4064 * @param iPhysExt The physical extent index of the list head.
4065 * @param iShwPT The shadow page table index.
4066 * @param iPte Page table entry
4067 *
4068 */
4069static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
4070{
4071 PGM_LOCK_ASSERT_OWNER(pVM);
4072 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4073 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4074
4075 /*
4076 * Special common cases.
4077 */
4078 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
4079 {
4080 paPhysExts[iPhysExt].aidx[1] = iShwPT;
4081 paPhysExts[iPhysExt].apte[1] = iPte;
4082 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4083 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
4084 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4085 }
4086 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
4087 {
4088 paPhysExts[iPhysExt].aidx[2] = iShwPT;
4089 paPhysExts[iPhysExt].apte[2] = iPte;
4090 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4091 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
4092 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4093 }
4094 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
4095
4096 /*
4097 * General treatment.
4098 */
4099 const uint16_t iPhysExtStart = iPhysExt;
4100 unsigned cMax = 15;
4101 for (;;)
4102 {
4103 Assert(iPhysExt < pPool->cMaxPhysExts);
4104 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4105 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
4106 {
4107 paPhysExts[iPhysExt].aidx[i] = iShwPT;
4108 paPhysExts[iPhysExt].apte[i] = iPte;
4109 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4110 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
4111 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
4112 }
4113 if (!--cMax)
4114 {
4115 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackOverflows);
4116 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4117 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
4118 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4119 }
4120
4121 /* advance */
4122 iPhysExt = paPhysExts[iPhysExt].iNext;
4123 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4124 break;
4125 }
4126
4127 /*
4128 * Add another extent to the list.
4129 */
4130 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4131 if (!pNew)
4132 {
4133 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackNoExtentsLeft);
4134 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4135 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4136 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4137 }
4138 pNew->iNext = iPhysExtStart;
4139 pNew->aidx[0] = iShwPT;
4140 pNew->apte[0] = iPte;
4141 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4142 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4143}
4144
4145
4146/**
4147 * Add a reference to guest physical page where extents are in use.
4148 *
4149 * @returns The new tracking data for PGMPAGE.
4150 *
4151 * @param pVM Pointer to the VM.
4152 * @param pPhysPage Pointer to the aPages entry in the ram range.
4153 * @param u16 The ram range flags (top 16-bits).
4154 * @param iShwPT The shadow page table index.
4155 * @param iPte Page table entry
4156 */
4157uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4158{
4159 pgmLock(pVM);
4160 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4161 {
4162 /*
4163 * Convert to extent list.
4164 */
4165 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4166 uint16_t iPhysExt;
4167 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4168 if (pPhysExt)
4169 {
4170 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4171 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliased);
4172 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4173 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4174 pPhysExt->aidx[1] = iShwPT;
4175 pPhysExt->apte[1] = iPte;
4176 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4177 }
4178 else
4179 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4180 }
4181 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4182 {
4183 /*
4184 * Insert into the extent list.
4185 */
4186 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4187 }
4188 else
4189 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedLots);
4190 pgmUnlock(pVM);
4191 return u16;
4192}
4193
4194
4195/**
4196 * Clear references to guest physical memory.
4197 *
4198 * @param pPool The pool.
4199 * @param pPage The page.
4200 * @param pPhysPage Pointer to the aPages entry in the ram range.
4201 * @param iPte Shadow PTE index
4202 */
4203void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4204{
4205 PVM pVM = pPool->CTX_SUFF(pVM);
4206 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4207 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4208
4209 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4210 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4211 {
4212 pgmLock(pVM);
4213
4214 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4215 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4216 do
4217 {
4218 Assert(iPhysExt < pPool->cMaxPhysExts);
4219
4220 /*
4221 * Look for the shadow page and check if it's all freed.
4222 */
4223 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4224 {
4225 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4226 && paPhysExts[iPhysExt].apte[i] == iPte)
4227 {
4228 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4229 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4230
4231 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4232 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4233 {
4234 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4235 pgmUnlock(pVM);
4236 return;
4237 }
4238
4239 /* we can free the node. */
4240 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4241 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4242 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4243 {
4244 /* lonely node */
4245 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4246 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4247 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4248 }
4249 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4250 {
4251 /* head */
4252 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4253 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4254 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4255 }
4256 else
4257 {
4258 /* in list */
4259 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4260 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4261 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4262 }
4263 iPhysExt = iPhysExtNext;
4264 pgmUnlock(pVM);
4265 return;
4266 }
4267 }
4268
4269 /* next */
4270 iPhysExtPrev = iPhysExt;
4271 iPhysExt = paPhysExts[iPhysExt].iNext;
4272 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4273
4274 pgmUnlock(pVM);
4275 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4276 }
4277 else /* nothing to do */
4278 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4279}
4280
4281/**
4282 * Clear references to guest physical memory.
4283 *
4284 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4285 * physical address is assumed to be correct, so the linear search can be
4286 * skipped and we can assert at an earlier point.
4287 *
4288 * @param pPool The pool.
4289 * @param pPage The page.
4290 * @param HCPhys The host physical address corresponding to the guest page.
4291 * @param GCPhys The guest physical address corresponding to HCPhys.
4292 * @param iPte Shadow PTE index
4293 */
4294static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4295{
4296 /*
4297 * Lookup the page and check if it checks out before derefing it.
4298 */
4299 PVM pVM = pPool->CTX_SUFF(pVM);
4300 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4301 if (pPhysPage)
4302 {
4303 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4304#ifdef LOG_ENABLED
4305 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4306 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4307#endif
4308 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4309 {
4310 Assert(pPage->cPresent);
4311 Assert(pPool->cPresent);
4312 pPage->cPresent--;
4313 pPool->cPresent--;
4314 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4315 return;
4316 }
4317
4318 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp\n",
4319 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage)));
4320 }
4321 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4322}
4323
4324
4325/**
4326 * Clear references to guest physical memory.
4327 *
4328 * @param pPool The pool.
4329 * @param pPage The page.
4330 * @param HCPhys The host physical address corresponding to the guest page.
4331 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4332 * @param iPte Shadow pte index
4333 */
4334void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4335{
4336 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4337
4338 /*
4339 * Try the hint first.
4340 */
4341 RTHCPHYS HCPhysHinted;
4342 PVM pVM = pPool->CTX_SUFF(pVM);
4343 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4344 if (pPhysPage)
4345 {
4346 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4347 Assert(HCPhysHinted);
4348 if (HCPhysHinted == HCPhys)
4349 {
4350 Assert(pPage->cPresent);
4351 Assert(pPool->cPresent);
4352 pPage->cPresent--;
4353 pPool->cPresent--;
4354 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4355 return;
4356 }
4357 }
4358 else
4359 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4360
4361 /*
4362 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4363 */
4364 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4365 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX);
4366 while (pRam)
4367 {
4368 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4369 while (iPage-- > 0)
4370 {
4371 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4372 {
4373 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4374 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4375 Assert(pPage->cPresent);
4376 Assert(pPool->cPresent);
4377 pPage->cPresent--;
4378 pPool->cPresent--;
4379 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4380 return;
4381 }
4382 }
4383 pRam = pRam->CTX_SUFF(pNext);
4384 }
4385
4386 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4387}
4388
4389
4390/**
4391 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4392 *
4393 * @param pPool The pool.
4394 * @param pPage The page.
4395 * @param pShwPT The shadow page table (mapping of the page).
4396 * @param pGstPT The guest page table.
4397 */
4398DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4399{
4400 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4401 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4402 {
4403 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4404 if (pShwPT->a[i].n.u1Present)
4405 {
4406 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4407 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4408 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & fPgMask, i);
4409 if (!pPage->cPresent)
4410 break;
4411 }
4412 }
4413}
4414
4415
4416/**
4417 * Clear references to guest physical memory in a PAE / 32-bit page table.
4418 *
4419 * @param pPool The pool.
4420 * @param pPage The page.
4421 * @param pShwPT The shadow page table (mapping of the page).
4422 * @param pGstPT The guest page table (just a half one).
4423 */
4424DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4425{
4426 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4427 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4428 {
4429 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4430 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4431 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4432 {
4433 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4434 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4435 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4436 if (!pPage->cPresent)
4437 break;
4438 }
4439 }
4440}
4441
4442
4443/**
4444 * Clear references to guest physical memory in a PAE / PAE page table.
4445 *
4446 * @param pPool The pool.
4447 * @param pPage The page.
4448 * @param pShwPT The shadow page table (mapping of the page).
4449 * @param pGstPT The guest page table.
4450 */
4451DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4452{
4453 RTGCPHYS const fPgMask = pPage->fA20Enabled ? X86_PTE_PAE_PG_MASK : X86_PTE_PAE_PG_MASK & ~RT_BIT_64(20);
4454 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4455 {
4456 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4457 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4458 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4459 {
4460 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4461 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4462 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4463 if (!pPage->cPresent)
4464 break;
4465 }
4466 }
4467}
4468
4469
4470/**
4471 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4472 *
4473 * @param pPool The pool.
4474 * @param pPage The page.
4475 * @param pShwPT The shadow page table (mapping of the page).
4476 */
4477DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4478{
4479 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4480 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4481 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4482 {
4483 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4484 if (pShwPT->a[i].n.u1Present)
4485 {
4486 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4487 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4488 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4489 if (!pPage->cPresent)
4490 break;
4491 }
4492 }
4493}
4494
4495
4496/**
4497 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4498 *
4499 * @param pPool The pool.
4500 * @param pPage The page.
4501 * @param pShwPT The shadow page table (mapping of the page).
4502 */
4503DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4504{
4505 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4506 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4507 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4508 {
4509 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4510 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4511 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4512 {
4513 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4514 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4515 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys & GCPhysA20Mask, i);
4516 if (!pPage->cPresent)
4517 break;
4518 }
4519 }
4520}
4521
4522
4523/**
4524 * Clear references to shadowed pages in an EPT page table.
4525 *
4526 * @param pPool The pool.
4527 * @param pPage The page.
4528 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4529 */
4530DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4531{
4532 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4533 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4534 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4535 {
4536 Assert((pShwPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4537 if (pShwPT->a[i].n.u1Present)
4538 {
4539 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4540 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4541 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4542 if (!pPage->cPresent)
4543 break;
4544 }
4545 }
4546}
4547
4548
4549/**
4550 * Clear references to shadowed pages in a 32 bits page directory.
4551 *
4552 * @param pPool The pool.
4553 * @param pPage The page.
4554 * @param pShwPD The shadow page directory (mapping of the page).
4555 */
4556DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4557{
4558 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4559 {
4560 Assert(!(pShwPD->a[i].u & RT_BIT_32(9)));
4561 if ( pShwPD->a[i].n.u1Present
4562 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4563 )
4564 {
4565 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4566 if (pSubPage)
4567 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4568 else
4569 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4570 }
4571 }
4572}
4573
4574
4575/**
4576 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4577 *
4578 * @param pPool The pool.
4579 * @param pPage The page.
4580 * @param pShwPD The shadow page directory (mapping of the page).
4581 */
4582DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4583{
4584 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4585 {
4586 if ( pShwPD->a[i].n.u1Present
4587 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING))
4588 {
4589#ifdef PGM_WITH_LARGE_PAGES
4590 if (pShwPD->a[i].b.u1Size)
4591 {
4592 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4593 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4594 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK,
4595 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4596 i);
4597 }
4598 else
4599#endif
4600 {
4601 Assert((pShwPD->a[i].u & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4602 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4603 if (pSubPage)
4604 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4605 else
4606 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4607 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4608 }
4609 }
4610 }
4611}
4612
4613
4614/**
4615 * Clear references to shadowed pages in a PAE page directory pointer table.
4616 *
4617 * @param pPool The pool.
4618 * @param pPage The page.
4619 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4620 */
4621DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4622{
4623 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4624 {
4625 Assert((pShwPDPT->a[i].u & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4626 if ( pShwPDPT->a[i].n.u1Present
4627 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4628 )
4629 {
4630 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4631 if (pSubPage)
4632 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4633 else
4634 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4635 }
4636 }
4637}
4638
4639
4640/**
4641 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4642 *
4643 * @param pPool The pool.
4644 * @param pPage The page.
4645 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4646 */
4647DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4648{
4649 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4650 {
4651 Assert((pShwPDPT->a[i].u & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4652 if (pShwPDPT->a[i].n.u1Present)
4653 {
4654 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4655 if (pSubPage)
4656 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4657 else
4658 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4659 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4660 }
4661 }
4662}
4663
4664
4665/**
4666 * Clear references to shadowed pages in a 64-bit level 4 page table.
4667 *
4668 * @param pPool The pool.
4669 * @param pPage The page.
4670 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4671 */
4672DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4673{
4674 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4675 {
4676 Assert((pShwPML4->a[i].u & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4677 if (pShwPML4->a[i].n.u1Present)
4678 {
4679 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4680 if (pSubPage)
4681 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4682 else
4683 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4684 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4685 }
4686 }
4687}
4688
4689
4690/**
4691 * Clear references to shadowed pages in an EPT page directory.
4692 *
4693 * @param pPool The pool.
4694 * @param pPage The page.
4695 * @param pShwPD The shadow page directory (mapping of the page).
4696 */
4697DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4698{
4699 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4700 {
4701 Assert((pShwPD->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4702 if (pShwPD->a[i].n.u1Present)
4703 {
4704#ifdef PGM_WITH_LARGE_PAGES
4705 if (pShwPD->a[i].b.u1Size)
4706 {
4707 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4708 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4709 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK,
4710 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4711 i);
4712 }
4713 else
4714#endif
4715 {
4716 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4717 if (pSubPage)
4718 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4719 else
4720 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4721 }
4722 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4723 }
4724 }
4725}
4726
4727
4728/**
4729 * Clear references to shadowed pages in an EPT page directory pointer table.
4730 *
4731 * @param pPool The pool.
4732 * @param pPage The page.
4733 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4734 */
4735DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4736{
4737 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4738 {
4739 Assert((pShwPDPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4740 if (pShwPDPT->a[i].n.u1Present)
4741 {
4742 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4743 if (pSubPage)
4744 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4745 else
4746 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4747 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4748 }
4749 }
4750}
4751
4752
4753/**
4754 * Clears all references made by this page.
4755 *
4756 * This includes other shadow pages and GC physical addresses.
4757 *
4758 * @param pPool The pool.
4759 * @param pPage The page.
4760 */
4761static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4762{
4763 /*
4764 * Map the shadow page and take action according to the page kind.
4765 */
4766 PVM pVM = pPool->CTX_SUFF(pVM);
4767 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4768 switch (pPage->enmKind)
4769 {
4770 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4771 {
4772 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4773 void *pvGst;
4774 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4775 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4776 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4777 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4778 break;
4779 }
4780
4781 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4782 {
4783 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4784 void *pvGst;
4785 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4786 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
4787 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4788 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4789 break;
4790 }
4791
4792 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4793 {
4794 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4795 void *pvGst;
4796 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4797 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
4798 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4799 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4800 break;
4801 }
4802
4803 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4804 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4805 {
4806 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4807 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4808 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4809 break;
4810 }
4811
4812 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4813 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4814 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4815 {
4816 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4817 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
4818 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4819 break;
4820 }
4821
4822 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4823 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4824 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4825 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4826 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4827 case PGMPOOLKIND_PAE_PD_PHYS:
4828 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4829 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4830 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4831 break;
4832
4833 case PGMPOOLKIND_32BIT_PD_PHYS:
4834 case PGMPOOLKIND_32BIT_PD:
4835 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4836 break;
4837
4838 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4839 case PGMPOOLKIND_PAE_PDPT:
4840 case PGMPOOLKIND_PAE_PDPT_PHYS:
4841 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4842 break;
4843
4844 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4845 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4846 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4847 break;
4848
4849 case PGMPOOLKIND_64BIT_PML4:
4850 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4851 break;
4852
4853 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4854 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4855 break;
4856
4857 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4858 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4859 break;
4860
4861 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4862 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4863 break;
4864
4865 default:
4866 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4867 }
4868
4869 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4870 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4871 ASMMemZeroPage(pvShw);
4872 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4873 pPage->fZeroed = true;
4874 Assert(!pPage->cPresent);
4875 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
4876}
4877
4878
4879/**
4880 * Flushes a pool page.
4881 *
4882 * This moves the page to the free list after removing all user references to it.
4883 *
4884 * @returns VBox status code.
4885 * @retval VINF_SUCCESS on success.
4886 * @param pPool The pool.
4887 * @param HCPhys The HC physical address of the shadow page.
4888 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4889 */
4890int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4891{
4892 PVM pVM = pPool->CTX_SUFF(pVM);
4893 bool fFlushRequired = false;
4894
4895 int rc = VINF_SUCCESS;
4896 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4897 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4898 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4899
4900 /*
4901 * Reject any attempts at flushing any of the special root pages (shall
4902 * not happen).
4903 */
4904 AssertMsgReturn(pPage->idx >= PGMPOOL_IDX_FIRST,
4905 ("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n",
4906 pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx),
4907 VINF_SUCCESS);
4908
4909 pgmLock(pVM);
4910
4911 /*
4912 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4913 */
4914 if (pgmPoolIsPageLocked(pPage))
4915 {
4916 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4917 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4918 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4919 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4920 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4921 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4922 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4923 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4924 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
4925 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
4926 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4927 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4928 pgmUnlock(pVM);
4929 return VINF_SUCCESS;
4930 }
4931
4932#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4933 /* Start a subset so we won't run out of mapping space. */
4934 PVMCPU pVCpu = VMMGetCpu(pVM);
4935 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
4936#endif
4937
4938 /*
4939 * Mark the page as being in need of an ASMMemZeroPage().
4940 */
4941 pPage->fZeroed = false;
4942
4943#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4944 if (pPage->fDirty)
4945 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirtyEntry, false /* do not remove */);
4946#endif
4947
4948 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4949 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4950 fFlushRequired = true;
4951
4952 /*
4953 * Clear the page.
4954 */
4955 pgmPoolTrackClearPageUsers(pPool, pPage);
4956 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4957 pgmPoolTrackDeref(pPool, pPage);
4958 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4959
4960 /*
4961 * Flush it from the cache.
4962 */
4963 pgmPoolCacheFlushPage(pPool, pPage);
4964
4965#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4966 /* Heavy stuff done. */
4967 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
4968#endif
4969
4970 /*
4971 * Deregistering the monitoring.
4972 */
4973 if (pPage->fMonitored)
4974 rc = pgmPoolMonitorFlush(pPool, pPage);
4975
4976 /*
4977 * Free the page.
4978 */
4979 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4980 pPage->iNext = pPool->iFreeHead;
4981 pPool->iFreeHead = pPage->idx;
4982 pPage->enmKind = PGMPOOLKIND_FREE;
4983 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4984 pPage->GCPhys = NIL_RTGCPHYS;
4985 pPage->fReusedFlushPending = false;
4986
4987 pPool->cUsedPages--;
4988
4989 /* Flush the TLBs of all VCPUs if required. */
4990 if ( fFlushRequired
4991 && fFlush)
4992 {
4993 PGM_INVL_ALL_VCPU_TLBS(pVM);
4994 }
4995
4996 pgmUnlock(pVM);
4997 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4998 return rc;
4999}
5000
5001
5002/**
5003 * Frees a usage of a pool page.
5004 *
5005 * The caller is responsible to updating the user table so that it no longer
5006 * references the shadow page.
5007 *
5008 * @param pPool The pool.
5009 * @param HCPhys The HC physical address of the shadow page.
5010 * @param iUser The shadow page pool index of the user table.
5011 * NIL_PGMPOOL_IDX for root pages.
5012 * @param iUserTable The index into the user table (shadowed). Ignored if
5013 * root page.
5014 */
5015void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
5016{
5017 PVM pVM = pPool->CTX_SUFF(pVM);
5018
5019 STAM_PROFILE_START(&pPool->StatFree, a);
5020 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
5021 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
5022 AssertReturnVoid(pPage->idx >= PGMPOOL_IDX_FIRST); /* paranoia (#6349) */
5023
5024 pgmLock(pVM);
5025 if (iUser != NIL_PGMPOOL_IDX)
5026 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
5027 if (!pPage->fCached)
5028 pgmPoolFlushPage(pPool, pPage);
5029 pgmUnlock(pVM);
5030 STAM_PROFILE_STOP(&pPool->StatFree, a);
5031}
5032
5033
5034/**
5035 * Makes one or more free page free.
5036 *
5037 * @returns VBox status code.
5038 * @retval VINF_SUCCESS on success.
5039 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
5040 *
5041 * @param pPool The pool.
5042 * @param enmKind Page table kind
5043 * @param iUser The user of the page.
5044 */
5045static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
5046{
5047 PVM pVM = pPool->CTX_SUFF(pVM);
5048 LogFlow(("pgmPoolMakeMoreFreePages: enmKind=%d iUser=%d\n", enmKind, iUser));
5049 NOREF(enmKind);
5050
5051 /*
5052 * If the pool isn't full grown yet, expand it.
5053 */
5054 if ( pPool->cCurPages < pPool->cMaxPages
5055#if defined(IN_RC)
5056 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
5057 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
5058 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
5059#endif
5060 )
5061 {
5062 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
5063#ifdef IN_RING3
5064 int rc = PGMR3PoolGrow(pVM);
5065#else
5066 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
5067#endif
5068 if (RT_FAILURE(rc))
5069 return rc;
5070 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
5071 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
5072 return VINF_SUCCESS;
5073 }
5074
5075 /*
5076 * Free one cached page.
5077 */
5078 return pgmPoolCacheFreeOne(pPool, iUser);
5079}
5080
5081
5082/**
5083 * Allocates a page from the pool.
5084 *
5085 * This page may actually be a cached page and not in need of any processing
5086 * on the callers part.
5087 *
5088 * @returns VBox status code.
5089 * @retval VINF_SUCCESS if a NEW page was allocated.
5090 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
5091 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
5092 *
5093 * @param pVM Pointer to the VM.
5094 * @param GCPhys The GC physical address of the page we're gonna shadow.
5095 * For 4MB and 2MB PD entries, it's the first address the
5096 * shadow PT is covering.
5097 * @param enmKind The kind of mapping.
5098 * @param enmAccess Access type for the mapping (only relevant for big pages)
5099 * @param fA20Enabled Whether the A20 gate is enabled or not.
5100 * @param iUser The shadow page pool index of the user table. Root
5101 * pages should pass NIL_PGMPOOL_IDX.
5102 * @param iUserTable The index into the user table (shadowed). Ignored for
5103 * root pages (iUser == NIL_PGMPOOL_IDX).
5104 * @param fLockPage Lock the page
5105 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
5106 */
5107int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
5108 uint16_t iUser, uint32_t iUserTable, bool fLockPage, PPPGMPOOLPAGE ppPage)
5109{
5110 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5111 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
5112 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
5113 *ppPage = NULL;
5114 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
5115 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
5116 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
5117
5118 pgmLock(pVM);
5119
5120 if (pPool->fCacheEnabled)
5121 {
5122 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, fA20Enabled, iUser, iUserTable, ppPage);
5123 if (RT_SUCCESS(rc2))
5124 {
5125 if (fLockPage)
5126 pgmPoolLockPage(pPool, *ppPage);
5127 pgmUnlock(pVM);
5128 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5129 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
5130 return rc2;
5131 }
5132 }
5133
5134 /*
5135 * Allocate a new one.
5136 */
5137 int rc = VINF_SUCCESS;
5138 uint16_t iNew = pPool->iFreeHead;
5139 if (iNew == NIL_PGMPOOL_IDX)
5140 {
5141 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
5142 if (RT_FAILURE(rc))
5143 {
5144 pgmUnlock(pVM);
5145 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
5146 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5147 return rc;
5148 }
5149 iNew = pPool->iFreeHead;
5150 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_PGM_POOL_IPE);
5151 }
5152
5153 /* unlink the free head */
5154 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5155 pPool->iFreeHead = pPage->iNext;
5156 pPage->iNext = NIL_PGMPOOL_IDX;
5157
5158 /*
5159 * Initialize it.
5160 */
5161 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5162 pPage->enmKind = enmKind;
5163 pPage->enmAccess = enmAccess;
5164 pPage->GCPhys = GCPhys;
5165 pPage->fA20Enabled = fA20Enabled;
5166 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5167 pPage->fMonitored = false;
5168 pPage->fCached = false;
5169 pPage->fDirty = false;
5170 pPage->fReusedFlushPending = false;
5171 pPage->cModifications = 0;
5172 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5173 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5174 pPage->cPresent = 0;
5175 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5176 pPage->idxDirtyEntry = 0;
5177 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5178 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5179 pPage->cLastAccessHandler = 0;
5180 pPage->cLocked = 0;
5181# ifdef VBOX_STRICT
5182 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5183# endif
5184
5185 /*
5186 * Insert into the tracking and cache. If this fails, free the page.
5187 */
5188 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5189 if (RT_FAILURE(rc3))
5190 {
5191 pPool->cUsedPages--;
5192 pPage->enmKind = PGMPOOLKIND_FREE;
5193 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5194 pPage->GCPhys = NIL_RTGCPHYS;
5195 pPage->iNext = pPool->iFreeHead;
5196 pPool->iFreeHead = pPage->idx;
5197 pgmUnlock(pVM);
5198 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5199 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
5200 return rc3;
5201 }
5202
5203 /*
5204 * Commit the allocation, clear the page and return.
5205 */
5206#ifdef VBOX_WITH_STATISTICS
5207 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5208 pPool->cUsedPagesHigh = pPool->cUsedPages;
5209#endif
5210
5211 if (!pPage->fZeroed)
5212 {
5213 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5214 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5215 ASMMemZeroPage(pv);
5216 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5217 }
5218
5219 *ppPage = pPage;
5220 if (fLockPage)
5221 pgmPoolLockPage(pPool, pPage);
5222 pgmUnlock(pVM);
5223 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5224 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5225 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5226 return rc;
5227}
5228
5229
5230/**
5231 * Frees a usage of a pool page.
5232 *
5233 * @param pVM Pointer to the VM.
5234 * @param HCPhys The HC physical address of the shadow page.
5235 * @param iUser The shadow page pool index of the user table.
5236 * NIL_PGMPOOL_IDX if root page.
5237 * @param iUserTable The index into the user table (shadowed). Ignored if
5238 * root page.
5239 */
5240void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5241{
5242 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5243 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5244 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5245}
5246
5247
5248/**
5249 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5250 *
5251 * @returns Pointer to the shadow page structure.
5252 * @param pPool The pool.
5253 * @param HCPhys The HC physical address of the shadow page.
5254 */
5255PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5256{
5257 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5258
5259 /*
5260 * Look up the page.
5261 */
5262 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5263
5264 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5265 return pPage;
5266}
5267
5268
5269/**
5270 * Internal worker for finding a page for debugging purposes, no assertions.
5271 *
5272 * @returns Pointer to the shadow page structure. NULL on if not found.
5273 * @param pPool The pool.
5274 * @param HCPhys The HC physical address of the shadow page.
5275 */
5276PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5277{
5278 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5279 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5280}
5281
5282#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5283
5284/**
5285 * Flush the specified page if present
5286 *
5287 * @param pVM Pointer to the VM.
5288 * @param GCPhys Guest physical address of the page to flush
5289 */
5290void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5291{
5292 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5293
5294 VM_ASSERT_EMT(pVM);
5295
5296 /*
5297 * Look up the GCPhys in the hash.
5298 */
5299 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5300 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5301 if (i == NIL_PGMPOOL_IDX)
5302 return;
5303
5304 do
5305 {
5306 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5307 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5308 {
5309 switch (pPage->enmKind)
5310 {
5311 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5312 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5313 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5314 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5315 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5316 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5317 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5318 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5319 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5320 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5321 case PGMPOOLKIND_64BIT_PML4:
5322 case PGMPOOLKIND_32BIT_PD:
5323 case PGMPOOLKIND_PAE_PDPT:
5324 {
5325 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5326#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5327 if (pPage->fDirty)
5328 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5329 else
5330#endif
5331 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5332 Assert(!pgmPoolIsPageLocked(pPage));
5333 pgmPoolMonitorChainFlush(pPool, pPage);
5334 return;
5335 }
5336
5337 /* ignore, no monitoring. */
5338 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5339 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5340 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5341 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5342 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5343 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5344 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5345 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5346 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5347 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5348 case PGMPOOLKIND_ROOT_NESTED:
5349 case PGMPOOLKIND_PAE_PD_PHYS:
5350 case PGMPOOLKIND_PAE_PDPT_PHYS:
5351 case PGMPOOLKIND_32BIT_PD_PHYS:
5352 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5353 break;
5354
5355 default:
5356 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5357 }
5358 }
5359
5360 /* next */
5361 i = pPage->iNext;
5362 } while (i != NIL_PGMPOOL_IDX);
5363 return;
5364}
5365
5366#endif /* IN_RING3 */
5367#ifdef IN_RING3
5368
5369/**
5370 * Reset CPU on hot plugging.
5371 *
5372 * @param pVM Pointer to the VM.
5373 * @param pVCpu The virtual CPU.
5374 */
5375void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5376{
5377 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5378
5379 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5380 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5381 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5382}
5383
5384
5385/**
5386 * Flushes the entire cache.
5387 *
5388 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5389 * this and execute this CR3 flush.
5390 *
5391 * @param pPool The pool.
5392 */
5393void pgmR3PoolReset(PVM pVM)
5394{
5395 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5396
5397 PGM_LOCK_ASSERT_OWNER(pVM);
5398 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5399 LogFlow(("pgmR3PoolReset:\n"));
5400
5401 /*
5402 * If there are no pages in the pool, there is nothing to do.
5403 */
5404 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5405 {
5406 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5407 return;
5408 }
5409
5410 /*
5411 * Exit the shadow mode since we're going to clear everything,
5412 * including the root page.
5413 */
5414 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5415 pgmR3ExitShadowModeBeforePoolFlush(&pVM->aCpus[i]);
5416
5417 /*
5418 * Nuke the free list and reinsert all pages into it.
5419 */
5420 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5421 {
5422 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5423
5424 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
5425 if (pPage->fMonitored)
5426 pgmPoolMonitorFlush(pPool, pPage);
5427 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5428 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5429 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5430 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5431 pPage->cModifications = 0;
5432 pPage->GCPhys = NIL_RTGCPHYS;
5433 pPage->enmKind = PGMPOOLKIND_FREE;
5434 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5435 Assert(pPage->idx == i);
5436 pPage->iNext = i + 1;
5437 pPage->fA20Enabled = true;
5438 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5439 pPage->fSeenNonGlobal = false;
5440 pPage->fMonitored = false;
5441 pPage->fDirty = false;
5442 pPage->fCached = false;
5443 pPage->fReusedFlushPending = false;
5444 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5445 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5446 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5447 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5448 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5449 pPage->cLastAccessHandler = 0;
5450 pPage->cLocked = 0;
5451#ifdef VBOX_STRICT
5452 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5453#endif
5454 }
5455 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5456 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5457 pPool->cUsedPages = 0;
5458
5459 /*
5460 * Zap and reinitialize the user records.
5461 */
5462 pPool->cPresent = 0;
5463 pPool->iUserFreeHead = 0;
5464 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5465 const unsigned cMaxUsers = pPool->cMaxUsers;
5466 for (unsigned i = 0; i < cMaxUsers; i++)
5467 {
5468 paUsers[i].iNext = i + 1;
5469 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5470 paUsers[i].iUserTable = 0xfffffffe;
5471 }
5472 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5473
5474 /*
5475 * Clear all the GCPhys links and rebuild the phys ext free list.
5476 */
5477 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
5478 pRam;
5479 pRam = pRam->CTX_SUFF(pNext))
5480 {
5481 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5482 while (iPage-- > 0)
5483 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5484 }
5485
5486 pPool->iPhysExtFreeHead = 0;
5487 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5488 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5489 for (unsigned i = 0; i < cMaxPhysExts; i++)
5490 {
5491 paPhysExts[i].iNext = i + 1;
5492 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5493 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5494 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5495 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5496 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5497 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5498 }
5499 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5500
5501 /*
5502 * Just zap the modified list.
5503 */
5504 pPool->cModifiedPages = 0;
5505 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5506
5507 /*
5508 * Clear the GCPhys hash and the age list.
5509 */
5510 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5511 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5512 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5513 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5514
5515#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5516 /* Clear all dirty pages. */
5517 pPool->idxFreeDirtyPage = 0;
5518 pPool->cDirtyPages = 0;
5519 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
5520 pPool->aDirtyPages[i].uIdx = NIL_PGMPOOL_IDX;
5521#endif
5522
5523 /*
5524 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5525 */
5526 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5527 {
5528 /*
5529 * Re-enter the shadowing mode and assert Sync CR3 FF.
5530 */
5531 PVMCPU pVCpu = &pVM->aCpus[i];
5532 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5533 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5534 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5535 }
5536
5537 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5538}
5539
5540#endif /* IN_RING3 */
5541
5542#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5543/**
5544 * Stringifies a PGMPOOLKIND value.
5545 */
5546static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5547{
5548 switch ((PGMPOOLKIND)enmKind)
5549 {
5550 case PGMPOOLKIND_INVALID:
5551 return "PGMPOOLKIND_INVALID";
5552 case PGMPOOLKIND_FREE:
5553 return "PGMPOOLKIND_FREE";
5554 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5555 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5556 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5557 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5558 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5559 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5560 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5561 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5562 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5563 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5564 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5565 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5566 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5567 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5568 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5569 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5570 case PGMPOOLKIND_32BIT_PD:
5571 return "PGMPOOLKIND_32BIT_PD";
5572 case PGMPOOLKIND_32BIT_PD_PHYS:
5573 return "PGMPOOLKIND_32BIT_PD_PHYS";
5574 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5575 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5576 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5577 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5578 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5579 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5580 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5581 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5582 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5583 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5584 case PGMPOOLKIND_PAE_PD_PHYS:
5585 return "PGMPOOLKIND_PAE_PD_PHYS";
5586 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5587 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5588 case PGMPOOLKIND_PAE_PDPT:
5589 return "PGMPOOLKIND_PAE_PDPT";
5590 case PGMPOOLKIND_PAE_PDPT_PHYS:
5591 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5592 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5593 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5594 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5595 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5596 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5597 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5598 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5599 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5600 case PGMPOOLKIND_64BIT_PML4:
5601 return "PGMPOOLKIND_64BIT_PML4";
5602 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5603 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5604 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5605 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5606 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5607 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5608 case PGMPOOLKIND_ROOT_NESTED:
5609 return "PGMPOOLKIND_ROOT_NESTED";
5610 }
5611 return "Unknown kind!";
5612}
5613#endif /* LOG_ENABLED || VBOX_STRICT */
5614
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette