VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 27088

Last change on this file since 27088 was 27065, checked in by vboxsync, 15 years ago

Fixed and cleaned up reference counting mess.
Additional code for cleaning up big pages.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 196.0 KB
Line 
1/* $Id: PGMAllPool.cpp 27065 2010-03-05 10:07:55Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_RC
32# include <VBox/patm.h>
33#endif
34#include "../PGMInternal.h"
35#include <VBox/vm.h>
36#include "../PGMInline.h"
37#include <VBox/disopcode.h>
38#include <VBox/hwacc_vmx.h>
39
40#include <VBox/log.h>
41#include <VBox/err.h>
42#include <iprt/asm.h>
43#include <iprt/string.h>
44
45
46/*******************************************************************************
47* Internal Functions *
48*******************************************************************************/
49RT_C_DECLS_BEGIN
50static void pgmPoolFlushAllInt(PPGMPOOL pPool);
51DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
52DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
53static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
54static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
55static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
56#ifndef IN_RING3
57DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
58#endif
59#ifdef LOG_ENABLED
60static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
61#endif
62#if defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)
63static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT);
64#endif
65
66int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
67PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
68void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
69void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
70static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
71
72RT_C_DECLS_END
73
74
75/**
76 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
77 *
78 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
79 * @param enmKind The page kind.
80 */
81DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
82{
83 switch (enmKind)
84 {
85 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
86 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
87 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
88 return true;
89 default:
90 return false;
91 }
92}
93
94/** @def PGMPOOL_PAGE_2_LOCKED_PTR
95 * Maps a pool page pool into the current context and lock it (RC only).
96 *
97 * @returns VBox status code.
98 * @param pVM The VM handle.
99 * @param pPage The pool page.
100 *
101 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
102 * small page window employeed by that function. Be careful.
103 * @remark There is no need to assert on the result.
104 */
105#if defined(IN_RC)
106DECLINLINE(void *) PGMPOOL_PAGE_2_LOCKED_PTR(PVM pVM, PPGMPOOLPAGE pPage)
107{
108 void *pv = pgmPoolMapPageInlined(&pVM->pgm.s, pPage);
109
110 /* Make sure the dynamic mapping will not be reused. */
111 if (pv)
112 PGMDynLockHCPage(pVM, (uint8_t *)pv);
113
114 return pv;
115}
116#else
117# define PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage) PGMPOOL_PAGE_2_PTR(pVM, pPage)
118#endif
119
120/** @def PGMPOOL_UNLOCK_PTR
121 * Unlock a previously locked dynamic caching (RC only).
122 *
123 * @returns VBox status code.
124 * @param pVM The VM handle.
125 * @param pPage The pool page.
126 *
127 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
128 * small page window employeed by that function. Be careful.
129 * @remark There is no need to assert on the result.
130 */
131#if defined(IN_RC)
132DECLINLINE(void) PGMPOOL_UNLOCK_PTR(PVM pVM, void *pvPage)
133{
134 if (pvPage)
135 PGMDynUnlockHCPage(pVM, (uint8_t *)pvPage);
136}
137#else
138# define PGMPOOL_UNLOCK_PTR(pVM, pPage) do {} while (0)
139#endif
140
141
142/**
143 * Flushes a chain of pages sharing the same access monitor.
144 *
145 * @returns VBox status code suitable for scheduling.
146 * @param pPool The pool.
147 * @param pPage A page in the chain.
148 */
149int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
150{
151 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
152
153 /*
154 * Find the list head.
155 */
156 uint16_t idx = pPage->idx;
157 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
158 {
159 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
160 {
161 idx = pPage->iMonitoredPrev;
162 Assert(idx != pPage->idx);
163 pPage = &pPool->aPages[idx];
164 }
165 }
166
167 /*
168 * Iterate the list flushing each shadow page.
169 */
170 int rc = VINF_SUCCESS;
171 for (;;)
172 {
173 idx = pPage->iMonitoredNext;
174 Assert(idx != pPage->idx);
175 if (pPage->idx >= PGMPOOL_IDX_FIRST)
176 {
177 int rc2 = pgmPoolFlushPage(pPool, pPage);
178 AssertRC(rc2);
179 }
180 /* next */
181 if (idx == NIL_PGMPOOL_IDX)
182 break;
183 pPage = &pPool->aPages[idx];
184 }
185 return rc;
186}
187
188
189/**
190 * Wrapper for getting the current context pointer to the entry being modified.
191 *
192 * @returns VBox status code suitable for scheduling.
193 * @param pVM VM Handle.
194 * @param pvDst Destination address
195 * @param pvSrc Source guest virtual address.
196 * @param GCPhysSrc The source guest physical address.
197 * @param cb Size of data to read
198 */
199DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
200{
201#if defined(IN_RING3)
202 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
203 return VINF_SUCCESS;
204#else
205 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
206 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
207#endif
208}
209
210/**
211 * Process shadow entries before they are changed by the guest.
212 *
213 * For PT entries we will clear them. For PD entries, we'll simply check
214 * for mapping conflicts and set the SyncCR3 FF if found.
215 *
216 * @param pVCpu VMCPU handle
217 * @param pPool The pool.
218 * @param pPage The head page.
219 * @param GCPhysFault The guest physical fault address.
220 * @param uAddress In R0 and GC this is the guest context fault address (flat).
221 * In R3 this is the host context 'fault' address.
222 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
223 */
224void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, unsigned cbWrite)
225{
226 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%#x (idx=%#x)\n", pPage->iMonitoredPrev, pPage->idx));
227 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
228 PVM pVM = pPool->CTX_SUFF(pVM);
229
230 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))pvAddress, GCPhysFault, cbWrite));
231
232 for (;;)
233 {
234 union
235 {
236 void *pv;
237 PX86PT pPT;
238 PX86PTPAE pPTPae;
239 PX86PD pPD;
240 PX86PDPAE pPDPae;
241 PX86PDPT pPDPT;
242 PX86PML4 pPML4;
243 } uShw;
244
245 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s\n", pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
246
247 uShw.pv = NULL;
248 switch (pPage->enmKind)
249 {
250 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
251 {
252 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
253 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
254 const unsigned iShw = off / sizeof(X86PTE);
255 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
256 if (uShw.pPT->a[iShw].n.u1Present)
257 {
258 X86PTE GstPte;
259
260 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
261 AssertRC(rc);
262 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
263 pgmPoolTracDerefGCPhysHint(pPool, pPage,
264 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
265 GstPte.u & X86_PTE_PG_MASK);
266 ASMAtomicWriteSize(&uShw.pPT->a[iShw], 0);
267 }
268 break;
269 }
270
271 /* page/2 sized */
272 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
273 {
274 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
275 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
276 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
277 {
278 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
279 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
280 if (uShw.pPTPae->a[iShw].n.u1Present)
281 {
282 X86PTE GstPte;
283 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
284 AssertRC(rc);
285
286 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
287 pgmPoolTracDerefGCPhysHint(pPool, pPage,
288 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
289 GstPte.u & X86_PTE_PG_MASK);
290 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw], 0);
291 }
292 }
293 break;
294 }
295
296 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
297 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
298 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
299 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
300 {
301 unsigned iGst = off / sizeof(X86PDE);
302 unsigned iShwPdpt = iGst / 256;
303 unsigned iShw = (iGst % 256) * 2;
304 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
305
306 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
307 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
308 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
309 {
310 for (unsigned i = 0; i < 2; i++)
311 {
312# ifndef IN_RING0
313 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
314 {
315 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
316 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
317 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
318 break;
319 }
320 else
321# endif /* !IN_RING0 */
322 if (uShw.pPDPae->a[iShw+i].n.u1Present)
323 {
324 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
325 pgmPoolFree(pVM,
326 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
327 pPage->idx,
328 iShw + i);
329 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw+i], 0);
330 }
331
332 /* paranoia / a bit assumptive. */
333 if ( (off & 3)
334 && (off & 3) + cbWrite > 4)
335 {
336 const unsigned iShw2 = iShw + 2 + i;
337 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
338 {
339# ifndef IN_RING0
340 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
341 {
342 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
343 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
344 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
345 break;
346 }
347 else
348# endif /* !IN_RING0 */
349 if (uShw.pPDPae->a[iShw2].n.u1Present)
350 {
351 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
352 pgmPoolFree(pVM,
353 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
354 pPage->idx,
355 iShw2);
356 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
357 }
358 }
359 }
360 }
361 }
362 break;
363 }
364
365 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
366 {
367 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
368 const unsigned iShw = off / sizeof(X86PTEPAE);
369 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
370 if (uShw.pPTPae->a[iShw].n.u1Present)
371 {
372 X86PTEPAE GstPte;
373 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
374 AssertRC(rc);
375
376 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
377 pgmPoolTracDerefGCPhysHint(pPool, pPage,
378 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
379 GstPte.u & X86_PTE_PAE_PG_MASK);
380 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw].u, 0);
381 }
382
383 /* paranoia / a bit assumptive. */
384 if ( (off & 7)
385 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
386 {
387 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
388 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
389
390 if (uShw.pPTPae->a[iShw2].n.u1Present)
391 {
392 X86PTEPAE GstPte;
393# ifdef IN_RING3
394 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
395# else
396 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
397# endif
398 AssertRC(rc);
399 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
400 pgmPoolTracDerefGCPhysHint(pPool, pPage,
401 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
402 GstPte.u & X86_PTE_PAE_PG_MASK);
403 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw2].u ,0);
404 }
405 }
406 break;
407 }
408
409 case PGMPOOLKIND_32BIT_PD:
410 {
411 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
412 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
413
414 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
415 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
416# ifndef IN_RING0
417 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
418 {
419 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
420 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
421 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
422 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
423 break;
424 }
425# endif /* !IN_RING0 */
426# ifndef IN_RING0
427 else
428# endif /* !IN_RING0 */
429 {
430 if (uShw.pPD->a[iShw].n.u1Present)
431 {
432 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
433 pgmPoolFree(pVM,
434 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
435 pPage->idx,
436 iShw);
437 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
438 }
439 }
440 /* paranoia / a bit assumptive. */
441 if ( (off & 3)
442 && (off & 3) + cbWrite > sizeof(X86PTE))
443 {
444 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
445 if ( iShw2 != iShw
446 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
447 {
448# ifndef IN_RING0
449 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
450 {
451 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
452 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
453 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
454 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
455 break;
456 }
457# endif /* !IN_RING0 */
458# ifndef IN_RING0
459 else
460# endif /* !IN_RING0 */
461 {
462 if (uShw.pPD->a[iShw2].n.u1Present)
463 {
464 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
465 pgmPoolFree(pVM,
466 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
467 pPage->idx,
468 iShw2);
469 ASMAtomicWriteSize(&uShw.pPD->a[iShw2].u, 0);
470 }
471 }
472 }
473 }
474#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
475 if ( uShw.pPD->a[iShw].n.u1Present
476 && !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
477 {
478 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
479# ifdef IN_RC /* TLB load - we're pushing things a bit... */
480 ASMProbeReadByte(pvAddress);
481# endif
482 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
483 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
484 }
485#endif
486 break;
487 }
488
489 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
490 {
491 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
492 const unsigned iShw = off / sizeof(X86PDEPAE);
493 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
494#ifndef IN_RING0
495 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
496 {
497 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
498 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
499 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
500 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
501 break;
502 }
503#endif /* !IN_RING0 */
504 /*
505 * Causes trouble when the guest uses a PDE to refer to the whole page table level
506 * structure. (Invalidate here; faults later on when it tries to change the page
507 * table entries -> recheck; probably only applies to the RC case.)
508 */
509# ifndef IN_RING0
510 else
511# endif /* !IN_RING0 */
512 {
513 if (uShw.pPDPae->a[iShw].n.u1Present)
514 {
515 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
516 pgmPoolFree(pVM,
517 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
518 pPage->idx,
519 iShw);
520 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
521 }
522 }
523 /* paranoia / a bit assumptive. */
524 if ( (off & 7)
525 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
526 {
527 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
528 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
529
530#ifndef IN_RING0
531 if ( iShw2 != iShw
532 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
533 {
534 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
535 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
536 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
537 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
538 break;
539 }
540#endif /* !IN_RING0 */
541# ifndef IN_RING0
542 else
543# endif /* !IN_RING0 */
544 if (uShw.pPDPae->a[iShw2].n.u1Present)
545 {
546 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
547 pgmPoolFree(pVM,
548 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
549 pPage->idx,
550 iShw2);
551 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
552 }
553 }
554 break;
555 }
556
557 case PGMPOOLKIND_PAE_PDPT:
558 {
559 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
560 /*
561 * Hopefully this doesn't happen very often:
562 * - touching unused parts of the page
563 * - messing with the bits of pd pointers without changing the physical address
564 */
565 /* PDPT roots are not page aligned; 32 byte only! */
566 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
567
568 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
569 const unsigned iShw = offPdpt / sizeof(X86PDPE);
570 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
571 {
572# ifndef IN_RING0
573 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
574 {
575 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
576 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
577 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
578 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
579 break;
580 }
581# endif /* !IN_RING0 */
582# ifndef IN_RING0
583 else
584# endif /* !IN_RING0 */
585 if (uShw.pPDPT->a[iShw].n.u1Present)
586 {
587 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
588 pgmPoolFree(pVM,
589 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
590 pPage->idx,
591 iShw);
592 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
593 }
594
595 /* paranoia / a bit assumptive. */
596 if ( (offPdpt & 7)
597 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
598 {
599 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
600 if ( iShw2 != iShw
601 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
602 {
603# ifndef IN_RING0
604 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
605 {
606 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
607 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
608 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
609 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
610 break;
611 }
612# endif /* !IN_RING0 */
613# ifndef IN_RING0
614 else
615# endif /* !IN_RING0 */
616 if (uShw.pPDPT->a[iShw2].n.u1Present)
617 {
618 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
619 pgmPoolFree(pVM,
620 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
621 pPage->idx,
622 iShw2);
623 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
624 }
625 }
626 }
627 }
628 break;
629 }
630
631#ifndef IN_RC
632 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
633 {
634 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
635 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
636 const unsigned iShw = off / sizeof(X86PDEPAE);
637 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
638 if (uShw.pPDPae->a[iShw].n.u1Present)
639 {
640 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
641 pgmPoolFree(pVM,
642 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
643 pPage->idx,
644 iShw);
645 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
646 }
647 /* paranoia / a bit assumptive. */
648 if ( (off & 7)
649 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
650 {
651 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
652 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
653
654 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
655 if (uShw.pPDPae->a[iShw2].n.u1Present)
656 {
657 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
658 pgmPoolFree(pVM,
659 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
660 pPage->idx,
661 iShw2);
662 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
663 }
664 }
665 break;
666 }
667
668 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
669 {
670 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
671 /*
672 * Hopefully this doesn't happen very often:
673 * - messing with the bits of pd pointers without changing the physical address
674 */
675 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
676 const unsigned iShw = off / sizeof(X86PDPE);
677 if (uShw.pPDPT->a[iShw].n.u1Present)
678 {
679 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
680 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
681 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
682 }
683 /* paranoia / a bit assumptive. */
684 if ( (off & 7)
685 && (off & 7) + cbWrite > sizeof(X86PDPE))
686 {
687 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
688 if (uShw.pPDPT->a[iShw2].n.u1Present)
689 {
690 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
691 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
692 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
693 }
694 }
695 break;
696 }
697
698 case PGMPOOLKIND_64BIT_PML4:
699 {
700 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
701 /*
702 * Hopefully this doesn't happen very often:
703 * - messing with the bits of pd pointers without changing the physical address
704 */
705 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
706 const unsigned iShw = off / sizeof(X86PDPE);
707 if (uShw.pPML4->a[iShw].n.u1Present)
708 {
709 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
710 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
711 ASMAtomicWriteSize(&uShw.pPML4->a[iShw].u, 0);
712 }
713 /* paranoia / a bit assumptive. */
714 if ( (off & 7)
715 && (off & 7) + cbWrite > sizeof(X86PDPE))
716 {
717 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
718 if (uShw.pPML4->a[iShw2].n.u1Present)
719 {
720 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
721 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
722 ASMAtomicWriteSize(&uShw.pPML4->a[iShw2].u, 0);
723 }
724 }
725 break;
726 }
727#endif /* IN_RING0 */
728
729 default:
730 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
731 }
732 PGMPOOL_UNLOCK_PTR(pVM, uShw.pv);
733
734 /* next */
735 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
736 return;
737 pPage = &pPool->aPages[pPage->iMonitoredNext];
738 }
739}
740
741# ifndef IN_RING3
742/**
743 * Checks if a access could be a fork operation in progress.
744 *
745 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
746 *
747 * @returns true if it's likly that we're forking, otherwise false.
748 * @param pPool The pool.
749 * @param pDis The disassembled instruction.
750 * @param offFault The access offset.
751 */
752DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
753{
754 /*
755 * i386 linux is using btr to clear X86_PTE_RW.
756 * The functions involved are (2.6.16 source inspection):
757 * clear_bit
758 * ptep_set_wrprotect
759 * copy_one_pte
760 * copy_pte_range
761 * copy_pmd_range
762 * copy_pud_range
763 * copy_page_range
764 * dup_mmap
765 * dup_mm
766 * copy_mm
767 * copy_process
768 * do_fork
769 */
770 if ( pDis->pCurInstr->opcode == OP_BTR
771 && !(offFault & 4)
772 /** @todo Validate that the bit index is X86_PTE_RW. */
773 )
774 {
775 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
776 return true;
777 }
778 return false;
779}
780
781
782/**
783 * Determine whether the page is likely to have been reused.
784 *
785 * @returns true if we consider the page as being reused for a different purpose.
786 * @returns false if we consider it to still be a paging page.
787 * @param pVM VM Handle.
788 * @param pVCpu VMCPU Handle.
789 * @param pRegFrame Trap register frame.
790 * @param pDis The disassembly info for the faulting instruction.
791 * @param pvFault The fault address.
792 *
793 * @remark The REP prefix check is left to the caller because of STOSD/W.
794 */
795DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
796{
797#ifndef IN_RC
798 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
799 if ( HWACCMHasPendingIrq(pVM)
800 && (pRegFrame->rsp - pvFault) < 32)
801 {
802 /* Fault caused by stack writes while trying to inject an interrupt event. */
803 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
804 return true;
805 }
806#else
807 NOREF(pVM); NOREF(pvFault);
808#endif
809
810 LogFlow(("Reused instr %RGv %d at %RGv param1.flags=%x param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->opcode, pvFault, pDis->param1.flags, pDis->param1.base.reg_gen));
811
812 /* Non-supervisor mode write means it's used for something else. */
813 if (CPUMGetGuestCPL(pVCpu, pRegFrame) != 0)
814 return true;
815
816 switch (pDis->pCurInstr->opcode)
817 {
818 /* call implies the actual push of the return address faulted */
819 case OP_CALL:
820 Log4(("pgmPoolMonitorIsReused: CALL\n"));
821 return true;
822 case OP_PUSH:
823 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
824 return true;
825 case OP_PUSHF:
826 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
827 return true;
828 case OP_PUSHA:
829 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
830 return true;
831 case OP_FXSAVE:
832 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
833 return true;
834 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
835 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
836 return true;
837 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
838 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
839 return true;
840 case OP_MOVSWD:
841 case OP_STOSWD:
842 if ( pDis->prefix == (PREFIX_REP|PREFIX_REX)
843 && pRegFrame->rcx >= 0x40
844 )
845 {
846 Assert(pDis->mode == CPUMODE_64BIT);
847
848 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
849 return true;
850 }
851 return false;
852 }
853 if ( ( (pDis->param1.flags & USE_REG_GEN32)
854 || (pDis->param1.flags & USE_REG_GEN64))
855 && (pDis->param1.base.reg_gen == USE_REG_ESP))
856 {
857 Log4(("pgmPoolMonitorIsReused: ESP\n"));
858 return true;
859 }
860
861 return false;
862}
863
864/**
865 * Flushes the page being accessed.
866 *
867 * @returns VBox status code suitable for scheduling.
868 * @param pVM The VM handle.
869 * @param pVCpu The VMCPU handle.
870 * @param pPool The pool.
871 * @param pPage The pool page (head).
872 * @param pDis The disassembly of the write instruction.
873 * @param pRegFrame The trap register frame.
874 * @param GCPhysFault The fault address as guest physical address.
875 * @param pvFault The fault address.
876 */
877static int pgmPoolAccessHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
878 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
879{
880 /*
881 * First, do the flushing.
882 */
883 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
884
885 /*
886 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection). Must do this in raw mode (!); XP boot will fail otherwise
887 */
888 uint32_t cbWritten;
889 int rc2 = EMInterpretInstructionCPUEx(pVM, pVCpu, pDis, pRegFrame, pvFault, &cbWritten, EMCODETYPE_ALL);
890 if (RT_SUCCESS(rc2))
891 pRegFrame->rip += pDis->opsize;
892 else if (rc2 == VERR_EM_INTERPRETER)
893 {
894#ifdef IN_RC
895 if (PATMIsPatchGCAddr(pVM, pRegFrame->eip))
896 {
897 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
898 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
899 rc = VINF_SUCCESS;
900 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
901 }
902 else
903#endif
904 {
905 rc = VINF_EM_RAW_EMULATE_INSTR;
906 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
907 }
908 }
909 else
910 rc = rc2;
911
912 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
913 return rc;
914}
915
916/**
917 * Handles the STOSD write accesses.
918 *
919 * @returns VBox status code suitable for scheduling.
920 * @param pVM The VM handle.
921 * @param pPool The pool.
922 * @param pPage The pool page (head).
923 * @param pDis The disassembly of the write instruction.
924 * @param pRegFrame The trap register frame.
925 * @param GCPhysFault The fault address as guest physical address.
926 * @param pvFault The fault address.
927 */
928DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
929 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
930{
931 unsigned uIncrement = pDis->param1.size;
932
933 Assert(pDis->mode == CPUMODE_32BIT || pDis->mode == CPUMODE_64BIT);
934 Assert(pRegFrame->rcx <= 0x20);
935
936#ifdef VBOX_STRICT
937 if (pDis->opmode == CPUMODE_32BIT)
938 Assert(uIncrement == 4);
939 else
940 Assert(uIncrement == 8);
941#endif
942
943 Log3(("pgmPoolAccessHandlerSTOSD\n"));
944
945 /*
946 * Increment the modification counter and insert it into the list
947 * of modified pages the first time.
948 */
949 if (!pPage->cModifications++)
950 pgmPoolMonitorModifiedInsert(pPool, pPage);
951
952 /*
953 * Execute REP STOSD.
954 *
955 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
956 * write situation, meaning that it's safe to write here.
957 */
958 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
959 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
960 while (pRegFrame->rcx)
961 {
962#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
963 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
964 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
965 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
966#else
967 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
968#endif
969#ifdef IN_RC
970 *(uint32_t *)(uintptr_t)pu32 = pRegFrame->eax;
971#else
972 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
973#endif
974 pu32 += uIncrement;
975 GCPhysFault += uIncrement;
976 pRegFrame->rdi += uIncrement;
977 pRegFrame->rcx--;
978 }
979 pRegFrame->rip += pDis->opsize;
980
981 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
982 return VINF_SUCCESS;
983}
984
985
986/**
987 * Handles the simple write accesses.
988 *
989 * @returns VBox status code suitable for scheduling.
990 * @param pVM The VM handle.
991 * @param pVCpu The VMCPU handle.
992 * @param pPool The pool.
993 * @param pPage The pool page (head).
994 * @param pDis The disassembly of the write instruction.
995 * @param pRegFrame The trap register frame.
996 * @param GCPhysFault The fault address as guest physical address.
997 * @param pvFault The fault address.
998 * @param pfReused Reused state (out)
999 */
1000DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
1001 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
1002{
1003 Log3(("pgmPoolAccessHandlerSimple\n"));
1004 /*
1005 * Increment the modification counter and insert it into the list
1006 * of modified pages the first time.
1007 */
1008 if (!pPage->cModifications++)
1009 pgmPoolMonitorModifiedInsert(pPool, pPage);
1010
1011 /*
1012 * Clear all the pages. ASSUMES that pvFault is readable.
1013 */
1014#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1015 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1016 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
1017 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1018#else
1019 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
1020#endif
1021
1022 /*
1023 * Interpret the instruction.
1024 */
1025 uint32_t cb;
1026 int rc = EMInterpretInstructionCPUEx(pVM, pVCpu, pDis, pRegFrame, pvFault, &cb, EMCODETYPE_ALL);
1027 if (RT_SUCCESS(rc))
1028 pRegFrame->rip += pDis->opsize;
1029 else if (rc == VERR_EM_INTERPRETER)
1030 {
1031 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1032 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode));
1033 rc = VINF_EM_RAW_EMULATE_INSTR;
1034 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1035 }
1036
1037#if 0 /* experimental code */
1038 if (rc == VINF_SUCCESS)
1039 {
1040 switch (pPage->enmKind)
1041 {
1042 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1043 {
1044 X86PTEPAE GstPte;
1045 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1046 AssertRC(rc);
1047
1048 /* Check the new value written by the guest. If present and with a bogus physical address, then
1049 * it's fairly safe to assume the guest is reusing the PT.
1050 */
1051 if (GstPte.n.u1Present)
1052 {
1053 RTHCPHYS HCPhys = -1;
1054 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1055 if (rc != VINF_SUCCESS)
1056 {
1057 *pfReused = true;
1058 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1059 }
1060 }
1061 break;
1062 }
1063 }
1064 }
1065#endif
1066
1067 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
1068 return rc;
1069}
1070
1071/**
1072 * \#PF Handler callback for PT write accesses.
1073 *
1074 * @returns VBox status code (appropriate for GC return).
1075 * @param pVM VM Handle.
1076 * @param uErrorCode CPU Error code.
1077 * @param pRegFrame Trap register frame.
1078 * NULL on DMA and other non CPU access.
1079 * @param pvFault The fault address (cr2).
1080 * @param GCPhysFault The GC physical address corresponding to pvFault.
1081 * @param pvUser User argument.
1082 */
1083DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1084{
1085 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1086 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1087 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1088 PVMCPU pVCpu = VMMGetCpu(pVM);
1089 unsigned cMaxModifications;
1090 bool fForcedFlush = false;
1091
1092 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1093
1094 pgmLock(pVM);
1095 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1096 {
1097 /* Pool page changed while we were waiting for the lock; ignore. */
1098 Log(("CPU%d: pgmPoolAccessHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1099 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1100 pgmUnlock(pVM);
1101 return VINF_SUCCESS;
1102 }
1103#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1104 if (pPage->fDirty)
1105 {
1106 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH));
1107 pgmUnlock(pVM);
1108 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1109 }
1110#endif
1111
1112#if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1113 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1114 {
1115 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1116 void *pvGst;
1117 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1118 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
1119 }
1120#endif
1121
1122 /*
1123 * Disassemble the faulting instruction.
1124 */
1125 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1126 int rc = EMInterpretDisasOne(pVM, pVCpu, pRegFrame, pDis, NULL);
1127 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1128 {
1129 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1130 pgmUnlock(pVM);
1131 return rc;
1132 }
1133
1134 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1135
1136 /*
1137 * We should ALWAYS have the list head as user parameter. This
1138 * is because we use that page to record the changes.
1139 */
1140 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1141
1142#ifdef IN_RING0
1143 /* Maximum nr of modifications depends on the page type. */
1144 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1145 cMaxModifications = 4;
1146 else
1147 cMaxModifications = 24;
1148#else
1149 cMaxModifications = 48;
1150#endif
1151
1152 /*
1153 * Incremental page table updates should weight more than random ones.
1154 * (Only applies when started from offset 0)
1155 */
1156 pVCpu->pgm.s.cPoolAccessHandler++;
1157 if ( pPage->pvLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1158 && pPage->pvLastAccessHandlerRip < pRegFrame->rip + 0x40
1159 && pvFault == (pPage->pvLastAccessHandlerFault + pDis->param1.size)
1160 && pVCpu->pgm.s.cPoolAccessHandler == (pPage->cLastAccessHandlerCount + 1))
1161 {
1162 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1163 pPage->cModifications = pPage->cModifications * 2;
1164 pPage->pvLastAccessHandlerFault = pvFault;
1165 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1166 if (pPage->cModifications >= cMaxModifications)
1167 {
1168 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1169 fForcedFlush = true;
1170 }
1171 }
1172
1173 if (pPage->cModifications >= cMaxModifications)
1174 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1175
1176 /*
1177 * Check if it's worth dealing with.
1178 */
1179 bool fReused = false;
1180 bool fNotReusedNotForking = false;
1181 if ( ( pPage->cModifications < cMaxModifications /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1182 || pgmPoolIsPageLocked(&pVM->pgm.s, pPage)
1183 )
1184 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1185 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1186 {
1187 /*
1188 * Simple instructions, no REP prefix.
1189 */
1190 if (!(pDis->prefix & (PREFIX_REP | PREFIX_REPNE)))
1191 {
1192 rc = pgmPoolAccessHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1193 if (fReused)
1194 goto flushPage;
1195
1196 /* A mov instruction to change the first page table entry will be remembered so we can detect
1197 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1198 */
1199 if ( rc == VINF_SUCCESS
1200 && pDis->pCurInstr->opcode == OP_MOV
1201 && (pvFault & PAGE_OFFSET_MASK) == 0)
1202 {
1203 pPage->pvLastAccessHandlerFault = pvFault;
1204 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1205 pPage->pvLastAccessHandlerRip = pRegFrame->rip;
1206 /* Make sure we don't kick out a page too quickly. */
1207 if (pPage->cModifications > 8)
1208 pPage->cModifications = 2;
1209 }
1210 else
1211 if (pPage->pvLastAccessHandlerFault == pvFault)
1212 {
1213 /* ignore the 2nd write to this page table entry. */
1214 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1215 }
1216 else
1217 {
1218 pPage->pvLastAccessHandlerFault = 0;
1219 pPage->pvLastAccessHandlerRip = 0;
1220 }
1221
1222 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1223 pgmUnlock(pVM);
1224 return rc;
1225 }
1226
1227 /*
1228 * Windows is frequently doing small memset() operations (netio test 4k+).
1229 * We have to deal with these or we'll kill the cache and performance.
1230 */
1231 if ( pDis->pCurInstr->opcode == OP_STOSWD
1232 && !pRegFrame->eflags.Bits.u1DF
1233 && pDis->opmode == pDis->mode
1234 && pDis->addrmode == pDis->mode)
1235 {
1236 bool fValidStosd = false;
1237
1238 if ( pDis->mode == CPUMODE_32BIT
1239 && pDis->prefix == PREFIX_REP
1240 && pRegFrame->ecx <= 0x20
1241 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1242 && !((uintptr_t)pvFault & 3)
1243 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1244 )
1245 {
1246 fValidStosd = true;
1247 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1248 }
1249 else
1250 if ( pDis->mode == CPUMODE_64BIT
1251 && pDis->prefix == (PREFIX_REP | PREFIX_REX)
1252 && pRegFrame->rcx <= 0x20
1253 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1254 && !((uintptr_t)pvFault & 7)
1255 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1256 )
1257 {
1258 fValidStosd = true;
1259 }
1260
1261 if (fValidStosd)
1262 {
1263 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1264 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1265 pgmUnlock(pVM);
1266 return rc;
1267 }
1268 }
1269
1270 /* REP prefix, don't bother. */
1271 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1272 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1273 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode, pDis->prefix));
1274 fNotReusedNotForking = true;
1275 }
1276
1277#if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1278 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1279 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1280 */
1281 if ( pPage->cModifications >= cMaxModifications
1282 && !fForcedFlush
1283 && pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1284 && ( fNotReusedNotForking
1285 || ( !pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
1286 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1287 )
1288 )
1289 {
1290 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
1291 Assert(pPage->fDirty == false);
1292
1293 /* Flush any monitored duplicates as we will disable write protection. */
1294 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1295 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1296 {
1297 PPGMPOOLPAGE pPageHead = pPage;
1298
1299 /* Find the monitor head. */
1300 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1301 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1302
1303 while (pPageHead)
1304 {
1305 unsigned idxNext = pPageHead->iMonitoredNext;
1306
1307 if (pPageHead != pPage)
1308 {
1309 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1310 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1311 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1312 AssertRC(rc2);
1313 }
1314
1315 if (idxNext == NIL_PGMPOOL_IDX)
1316 break;
1317
1318 pPageHead = &pPool->aPages[idxNext];
1319 }
1320 }
1321
1322 /* The flushing above might fail for locked pages, so double check. */
1323 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1324 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1325 {
1326 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1327
1328 /* Temporarily allow write access to the page table again. */
1329 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys, pPage->GCPhys);
1330 if (rc == VINF_SUCCESS)
1331 {
1332 rc = PGMShwModifyPage(pVCpu, pvFault, 1, X86_PTE_RW, ~(uint64_t)X86_PTE_RW);
1333 AssertMsg(rc == VINF_SUCCESS
1334 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1335 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1336 || rc == VERR_PAGE_NOT_PRESENT,
1337 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1338
1339 pPage->pvDirtyFault = pvFault;
1340
1341 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1342 pgmUnlock(pVM);
1343 return rc;
1344 }
1345 }
1346 }
1347#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1348
1349 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushModOverflow));
1350flushPage:
1351 /*
1352 * Not worth it, so flush it.
1353 *
1354 * If we considered it to be reused, don't go back to ring-3
1355 * to emulate failed instructions since we usually cannot
1356 * interpret then. This may be a bit risky, in which case
1357 * the reuse detection must be fixed.
1358 */
1359 rc = pgmPoolAccessHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1360 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1361 && fReused)
1362 {
1363 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1364 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1365 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1366 }
1367 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1368 pgmUnlock(pVM);
1369 return rc;
1370}
1371
1372# endif /* !IN_RING3 */
1373
1374# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1375
1376# ifdef VBOX_STRICT
1377/**
1378 * Check references to guest physical memory in a PAE / PAE page table.
1379 *
1380 * @param pPool The pool.
1381 * @param pPage The page.
1382 * @param pShwPT The shadow page table (mapping of the page).
1383 * @param pGstPT The guest page table.
1384 */
1385static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
1386{
1387 unsigned cErrors = 0;
1388 int LastRc = -1; /* initialized to shut up gcc */
1389 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1390 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1391
1392#ifdef VBOX_STRICT
1393 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1394 AssertMsg(!pShwPT->a[i].n.u1Present, ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, pShwPT->a[i].u, pPage->iFirstPresent));
1395#endif
1396 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1397 {
1398 if (pShwPT->a[i].n.u1Present)
1399 {
1400 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1401 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1402 if ( rc != VINF_SUCCESS
1403 || (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) != HCPhys)
1404 {
1405 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, pShwPT->a[i].u, HCPhys));
1406 LastPTE = i;
1407 LastRc = rc;
1408 LastHCPhys = HCPhys;
1409 cErrors++;
1410
1411 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1412 rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pPage->GCPhys, &HCPhysPT);
1413 AssertRC(rc);
1414
1415 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1416 {
1417 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1418
1419 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1420 {
1421 PX86PTPAE pShwPT2 = (PX86PTPAE)PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pTempPage);
1422
1423 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1424 {
1425 if ( pShwPT2->a[j].n.u1Present
1426 && pShwPT2->a[j].n.u1Write
1427 && ((pShwPT2->a[j].u & X86_PTE_PAE_PG_MASK) == HCPhysPT))
1428 {
1429 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, pShwPT->a[j].u, pShwPT2->a[j].u));
1430 }
1431 }
1432 }
1433 }
1434 }
1435 }
1436 }
1437 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, pShwPT->a[LastPTE].u, LastHCPhys));
1438}
1439# endif /* VBOX_STRICT */
1440
1441/**
1442 * Clear references to guest physical memory in a PAE / PAE page table.
1443 *
1444 * @returns nr of changed PTEs
1445 * @param pPool The pool.
1446 * @param pPage The page.
1447 * @param pShwPT The shadow page table (mapping of the page).
1448 * @param pGstPT The guest page table.
1449 * @param pOldGstPT The old cached guest page table.
1450 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1451 * @param pfFlush Flush reused page table (out)
1452 */
1453DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT, PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1454{
1455 unsigned cChanged = 0;
1456
1457#ifdef VBOX_STRICT
1458 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1459 AssertMsg(!pShwPT->a[i].n.u1Present, ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, pShwPT->a[i].u, pPage->iFirstPresent));
1460#endif
1461 *pfFlush = false;
1462
1463 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1464 {
1465 /* Check the new value written by the guest. If present and with a bogus physical address, then
1466 * it's fairly safe to assume the guest is reusing the PT.
1467 */
1468 if ( fAllowRemoval
1469 && pGstPT->a[i].n.u1Present)
1470 {
1471 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1472 {
1473 *pfFlush = true;
1474 return ++cChanged;
1475 }
1476 }
1477 if (pShwPT->a[i].n.u1Present)
1478 {
1479 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1480 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1481 {
1482#ifdef VBOX_STRICT
1483 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1484 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1485 AssertMsg(rc == VINF_SUCCESS && (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, pShwPT->a[i].u, HCPhys));
1486#endif
1487 uint64_t uHostAttr = pShwPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1488 bool fHostRW = !!(pShwPT->a[i].u & X86_PTE_RW);
1489 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1490 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1491
1492 if ( uHostAttr == uGuestAttr
1493 && fHostRW <= fGuestRW)
1494 continue;
1495 }
1496 cChanged++;
1497 /* Something was changed, so flush it. */
1498 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1499 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1500 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
1501 ASMAtomicWriteSize(&pShwPT->a[i].u, 0);
1502 }
1503 }
1504 return cChanged;
1505}
1506
1507
1508/**
1509 * Flush a dirty page
1510 *
1511 * @param pVM VM Handle.
1512 * @param pPool The pool.
1513 * @param idxSlot Dirty array slot index
1514 * @param fAllowRemoval Allow a reused page table to be removed
1515 */
1516static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1517{
1518 PPGMPOOLPAGE pPage;
1519 unsigned idxPage;
1520
1521 Assert(idxSlot < RT_ELEMENTS(pPool->aIdxDirtyPages));
1522 if (pPool->aIdxDirtyPages[idxSlot] == NIL_PGMPOOL_IDX)
1523 return;
1524
1525 idxPage = pPool->aIdxDirtyPages[idxSlot];
1526 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1527 pPage = &pPool->aPages[idxPage];
1528 Assert(pPage->idx == idxPage);
1529 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1530
1531 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1532 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1533
1534 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1535 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys);
1536 Assert(rc == VINF_SUCCESS);
1537 pPage->fDirty = false;
1538
1539#ifdef VBOX_STRICT
1540 uint64_t fFlags = 0;
1541 RTHCPHYS HCPhys;
1542 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->pvDirtyFault, &fFlags, &HCPhys);
1543 AssertMsg( ( rc == VINF_SUCCESS
1544 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1545 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1546 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1547 || rc == VERR_PAGE_NOT_PRESENT,
1548 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->pvDirtyFault, rc, fFlags));
1549#endif
1550
1551 /* Flush those PTEs that have changed. */
1552 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1553 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1554 void *pvGst;
1555 bool fFlush;
1556 rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1557 unsigned cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst, (PCX86PTPAE)&pPool->aDirtyPages[idxSlot][0], fAllowRemoval, &fFlush);
1558 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1559 /** Note: we might want to consider keeping the dirty page active in case there were many changes. */
1560
1561 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1562 Assert(pPage->cModifications);
1563 if (cChanges < 4)
1564 pPage->cModifications = 1; /* must use > 0 here */
1565 else
1566 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1567
1568 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1569 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages))
1570 pPool->idxFreeDirtyPage = idxSlot;
1571
1572 pPool->cDirtyPages--;
1573 pPool->aIdxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1574 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1575 if (fFlush)
1576 {
1577 Assert(fAllowRemoval);
1578 Log(("Flush reused page table!\n"));
1579 pgmPoolFlushPage(pPool, pPage);
1580 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1581 }
1582 else
1583 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1584}
1585
1586# ifndef IN_RING3
1587/**
1588 * Add a new dirty page
1589 *
1590 * @param pVM VM Handle.
1591 * @param pPool The pool.
1592 * @param pPage The page.
1593 */
1594void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1595{
1596 unsigned idxFree;
1597
1598 Assert(PGMIsLocked(pVM));
1599 AssertCompile(RT_ELEMENTS(pPool->aIdxDirtyPages) == 8 || RT_ELEMENTS(pPool->aIdxDirtyPages) == 16);
1600 Assert(!pPage->fDirty);
1601
1602 idxFree = pPool->idxFreeDirtyPage;
1603 Assert(idxFree < RT_ELEMENTS(pPool->aIdxDirtyPages));
1604 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1605
1606 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aIdxDirtyPages))
1607 {
1608 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1609 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1610 }
1611 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages));
1612 AssertMsg(pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1613
1614 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1615
1616 /* Make a copy of the guest page table as we require valid GCPhys addresses when removing
1617 * references to physical pages. (the HCPhys linear lookup is *extremely* expensive!)
1618 */
1619 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1620 void *pvGst;
1621 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1622 memcpy(&pPool->aDirtyPages[idxFree][0], pvGst, PAGE_SIZE);
1623#ifdef VBOX_STRICT
1624 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
1625#endif
1626
1627 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1628 pPage->fDirty = true;
1629 pPage->idxDirty = idxFree;
1630 pPool->aIdxDirtyPages[idxFree] = pPage->idx;
1631 pPool->cDirtyPages++;
1632
1633 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1634 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages)
1635 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1636 {
1637 unsigned i;
1638 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1639 {
1640 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1641 if (pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1642 {
1643 pPool->idxFreeDirtyPage = idxFree;
1644 break;
1645 }
1646 }
1647 Assert(i != RT_ELEMENTS(pPool->aIdxDirtyPages));
1648 }
1649
1650 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages) || pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1651 return;
1652}
1653# endif /* !IN_RING3 */
1654
1655/**
1656 * Check if the specified page is dirty (not write monitored)
1657 *
1658 * @return dirty or not
1659 * @param pVM VM Handle.
1660 * @param GCPhys Guest physical address
1661 */
1662bool pgmPoolIsDirtyPage(PVM pVM, RTGCPHYS GCPhys)
1663{
1664 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1665 Assert(PGMIsLocked(pVM));
1666 if (!pPool->cDirtyPages)
1667 return false;
1668
1669 GCPhys = GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1670
1671 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1672 {
1673 if (pPool->aIdxDirtyPages[i] != NIL_PGMPOOL_IDX)
1674 {
1675 PPGMPOOLPAGE pPage;
1676 unsigned idxPage = pPool->aIdxDirtyPages[i];
1677
1678 pPage = &pPool->aPages[idxPage];
1679 if (pPage->GCPhys == GCPhys)
1680 return true;
1681 }
1682 }
1683 return false;
1684}
1685
1686/**
1687 * Reset all dirty pages by reinstating page monitoring.
1688 *
1689 * @param pVM VM Handle.
1690 */
1691void pgmPoolResetDirtyPages(PVM pVM)
1692{
1693 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1694 Assert(PGMIsLocked(pVM));
1695 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1696
1697 if (!pPool->cDirtyPages)
1698 return;
1699
1700 Log(("pgmPoolResetDirtyPages\n"));
1701 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1702 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1703
1704 pPool->idxFreeDirtyPage = 0;
1705 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aIdxDirtyPages)
1706 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1707 {
1708 unsigned i;
1709 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1710 {
1711 if (pPool->aIdxDirtyPages[i] == NIL_PGMPOOL_IDX)
1712 {
1713 pPool->idxFreeDirtyPage = i;
1714 break;
1715 }
1716 }
1717 AssertMsg(i != RT_ELEMENTS(pPool->aIdxDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1718 }
1719
1720 Assert(pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages));
1721 return;
1722}
1723
1724/**
1725 * Reset all dirty pages by reinstating page monitoring.
1726 *
1727 * @param pVM VM Handle.
1728 * @param GCPhysPT Physical address of the page table
1729 */
1730void pgmPoolInvalidateDirtyPage(PVM pVM, RTGCPHYS GCPhysPT)
1731{
1732 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1733 Assert(PGMIsLocked(pVM));
1734 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1735 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aIdxDirtyPages);
1736
1737 if (!pPool->cDirtyPages)
1738 return;
1739
1740 GCPhysPT = GCPhysPT & ~(RTGCPHYS)(PAGE_SIZE - 1);
1741
1742 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1743 {
1744 if (pPool->aIdxDirtyPages[i] != NIL_PGMPOOL_IDX)
1745 {
1746 unsigned idxPage = pPool->aIdxDirtyPages[i];
1747
1748 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1749 if (pPage->GCPhys == GCPhysPT)
1750 {
1751 idxDirtyPage = i;
1752 break;
1753 }
1754 }
1755 }
1756
1757 if (idxDirtyPage != RT_ELEMENTS(pPool->aIdxDirtyPages))
1758 {
1759 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
1760 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aIdxDirtyPages)
1761 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1762 {
1763 unsigned i;
1764 for (i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1765 {
1766 if (pPool->aIdxDirtyPages[i] == NIL_PGMPOOL_IDX)
1767 {
1768 pPool->idxFreeDirtyPage = i;
1769 break;
1770 }
1771 }
1772 AssertMsg(i != RT_ELEMENTS(pPool->aIdxDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1773 }
1774 }
1775}
1776
1777# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1778
1779/**
1780 * Inserts a page into the GCPhys hash table.
1781 *
1782 * @param pPool The pool.
1783 * @param pPage The page.
1784 */
1785DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1786{
1787 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1788 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1789 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1790 pPage->iNext = pPool->aiHash[iHash];
1791 pPool->aiHash[iHash] = pPage->idx;
1792}
1793
1794
1795/**
1796 * Removes a page from the GCPhys hash table.
1797 *
1798 * @param pPool The pool.
1799 * @param pPage The page.
1800 */
1801DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1802{
1803 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1804 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1805 if (pPool->aiHash[iHash] == pPage->idx)
1806 pPool->aiHash[iHash] = pPage->iNext;
1807 else
1808 {
1809 uint16_t iPrev = pPool->aiHash[iHash];
1810 for (;;)
1811 {
1812 const int16_t i = pPool->aPages[iPrev].iNext;
1813 if (i == pPage->idx)
1814 {
1815 pPool->aPages[iPrev].iNext = pPage->iNext;
1816 break;
1817 }
1818 if (i == NIL_PGMPOOL_IDX)
1819 {
1820 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1821 break;
1822 }
1823 iPrev = i;
1824 }
1825 }
1826 pPage->iNext = NIL_PGMPOOL_IDX;
1827}
1828
1829
1830/**
1831 * Frees up one cache page.
1832 *
1833 * @returns VBox status code.
1834 * @retval VINF_SUCCESS on success.
1835 * @param pPool The pool.
1836 * @param iUser The user index.
1837 */
1838static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1839{
1840#ifndef IN_RC
1841 const PVM pVM = pPool->CTX_SUFF(pVM);
1842#endif
1843 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1844 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1845
1846 /*
1847 * Select one page from the tail of the age list.
1848 */
1849 PPGMPOOLPAGE pPage;
1850 for (unsigned iLoop = 0; ; iLoop++)
1851 {
1852 uint16_t iToFree = pPool->iAgeTail;
1853 if (iToFree == iUser)
1854 iToFree = pPool->aPages[iToFree].iAgePrev;
1855/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1856 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1857 {
1858 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1859 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1860 {
1861 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1862 continue;
1863 iToFree = i;
1864 break;
1865 }
1866 }
1867*/
1868 Assert(iToFree != iUser);
1869 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1870 pPage = &pPool->aPages[iToFree];
1871
1872 /*
1873 * Reject any attempts at flushing the currently active shadow CR3 mapping.
1874 * Call pgmPoolCacheUsed to move the page to the head of the age list.
1875 */
1876 if (!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage))
1877 break;
1878 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
1879 pgmPoolCacheUsed(pPool, pPage);
1880 AssertLogRelReturn(iLoop < 8192, VERR_INTERNAL_ERROR);
1881 }
1882
1883 /*
1884 * Found a usable page, flush it and return.
1885 */
1886 int rc = pgmPoolFlushPage(pPool, pPage);
1887 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
1888 /* todo: find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
1889 if (rc == VINF_SUCCESS)
1890 PGM_INVL_ALL_VCPU_TLBS(pVM);
1891 return rc;
1892}
1893
1894
1895/**
1896 * Checks if a kind mismatch is really a page being reused
1897 * or if it's just normal remappings.
1898 *
1899 * @returns true if reused and the cached page (enmKind1) should be flushed
1900 * @returns false if not reused.
1901 * @param enmKind1 The kind of the cached page.
1902 * @param enmKind2 The kind of the requested page.
1903 */
1904static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1905{
1906 switch (enmKind1)
1907 {
1908 /*
1909 * Never reuse them. There is no remapping in non-paging mode.
1910 */
1911 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1912 case PGMPOOLKIND_32BIT_PD_PHYS:
1913 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1914 case PGMPOOLKIND_PAE_PD_PHYS:
1915 case PGMPOOLKIND_PAE_PDPT_PHYS:
1916 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1917 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1918 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1919 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1920 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1921 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
1922 return false;
1923
1924 /*
1925 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1926 */
1927 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1928 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1929 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1930 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1931 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1932 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1933 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1934 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1935 case PGMPOOLKIND_32BIT_PD:
1936 case PGMPOOLKIND_PAE_PDPT:
1937 switch (enmKind2)
1938 {
1939 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1940 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1941 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1942 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1943 case PGMPOOLKIND_64BIT_PML4:
1944 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1945 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1946 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1947 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1948 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1949 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1950 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1951 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1952 return true;
1953 default:
1954 return false;
1955 }
1956
1957 /*
1958 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1959 */
1960 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1961 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1962 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1963 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1964 case PGMPOOLKIND_64BIT_PML4:
1965 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1966 switch (enmKind2)
1967 {
1968 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1969 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1970 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1971 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1972 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1973 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1974 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1975 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1976 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1977 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1978 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1979 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1980 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1981 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1982 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1983 return true;
1984 default:
1985 return false;
1986 }
1987
1988 /*
1989 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1990 */
1991 case PGMPOOLKIND_ROOT_NESTED:
1992 return false;
1993
1994 default:
1995 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1996 }
1997}
1998
1999
2000/**
2001 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2002 *
2003 * @returns VBox status code.
2004 * @retval VINF_PGM_CACHED_PAGE on success.
2005 * @retval VERR_FILE_NOT_FOUND if not found.
2006 * @param pPool The pool.
2007 * @param GCPhys The GC physical address of the page we're gonna shadow.
2008 * @param enmKind The kind of mapping.
2009 * @param enmAccess Access type for the mapping (only relevant for big pages)
2010 * @param iUser The shadow page pool index of the user table.
2011 * @param iUserTable The index into the user table (shadowed).
2012 * @param ppPage Where to store the pointer to the page.
2013 */
2014static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2015{
2016#ifndef IN_RC
2017 const PVM pVM = pPool->CTX_SUFF(pVM);
2018#endif
2019 /*
2020 * Look up the GCPhys in the hash.
2021 */
2022 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2023 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%x iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2024 if (i != NIL_PGMPOOL_IDX)
2025 {
2026 do
2027 {
2028 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2029 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2030 if (pPage->GCPhys == GCPhys)
2031 {
2032 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2033 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess)
2034 {
2035 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2036 * doesn't flush it in case there are no more free use records.
2037 */
2038 pgmPoolCacheUsed(pPool, pPage);
2039
2040 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2041 if (RT_SUCCESS(rc))
2042 {
2043 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2044 *ppPage = pPage;
2045 if (pPage->cModifications)
2046 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2047 STAM_COUNTER_INC(&pPool->StatCacheHits);
2048 return VINF_PGM_CACHED_PAGE;
2049 }
2050 return rc;
2051 }
2052
2053 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2054 {
2055 /*
2056 * The kind is different. In some cases we should now flush the page
2057 * as it has been reused, but in most cases this is normal remapping
2058 * of PDs as PT or big pages using the GCPhys field in a slightly
2059 * different way than the other kinds.
2060 */
2061 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2062 {
2063 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2064 pgmPoolFlushPage(pPool, pPage);
2065 break;
2066 }
2067 }
2068 }
2069
2070 /* next */
2071 i = pPage->iNext;
2072 } while (i != NIL_PGMPOOL_IDX);
2073 }
2074
2075 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2076 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2077 return VERR_FILE_NOT_FOUND;
2078}
2079
2080
2081/**
2082 * Inserts a page into the cache.
2083 *
2084 * @param pPool The pool.
2085 * @param pPage The cached page.
2086 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2087 */
2088static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2089{
2090 /*
2091 * Insert into the GCPhys hash if the page is fit for that.
2092 */
2093 Assert(!pPage->fCached);
2094 if (fCanBeCached)
2095 {
2096 pPage->fCached = true;
2097 pgmPoolHashInsert(pPool, pPage);
2098 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2099 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2100 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2101 }
2102 else
2103 {
2104 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2105 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2106 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2107 }
2108
2109 /*
2110 * Insert at the head of the age list.
2111 */
2112 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2113 pPage->iAgeNext = pPool->iAgeHead;
2114 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2115 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2116 else
2117 pPool->iAgeTail = pPage->idx;
2118 pPool->iAgeHead = pPage->idx;
2119}
2120
2121
2122/**
2123 * Flushes a cached page.
2124 *
2125 * @param pPool The pool.
2126 * @param pPage The cached page.
2127 */
2128static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2129{
2130 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2131
2132 /*
2133 * Remove the page from the hash.
2134 */
2135 if (pPage->fCached)
2136 {
2137 pPage->fCached = false;
2138 pgmPoolHashRemove(pPool, pPage);
2139 }
2140 else
2141 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2142
2143 /*
2144 * Remove it from the age list.
2145 */
2146 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2147 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2148 else
2149 pPool->iAgeTail = pPage->iAgePrev;
2150 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2151 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2152 else
2153 pPool->iAgeHead = pPage->iAgeNext;
2154 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2155 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2156}
2157
2158
2159/**
2160 * Looks for pages sharing the monitor.
2161 *
2162 * @returns Pointer to the head page.
2163 * @returns NULL if not found.
2164 * @param pPool The Pool
2165 * @param pNewPage The page which is going to be monitored.
2166 */
2167static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2168{
2169 /*
2170 * Look up the GCPhys in the hash.
2171 */
2172 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
2173 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2174 if (i == NIL_PGMPOOL_IDX)
2175 return NULL;
2176 do
2177 {
2178 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2179 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2180 && pPage != pNewPage)
2181 {
2182 switch (pPage->enmKind)
2183 {
2184 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2185 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2186 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2187 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2188 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2189 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2190 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2191 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2192 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2193 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2194 case PGMPOOLKIND_64BIT_PML4:
2195 case PGMPOOLKIND_32BIT_PD:
2196 case PGMPOOLKIND_PAE_PDPT:
2197 {
2198 /* find the head */
2199 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2200 {
2201 Assert(pPage->iMonitoredPrev != pPage->idx);
2202 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2203 }
2204 return pPage;
2205 }
2206
2207 /* ignore, no monitoring. */
2208 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2209 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2210 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2211 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2212 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2213 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2214 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2215 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2216 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2217 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2218 case PGMPOOLKIND_ROOT_NESTED:
2219 case PGMPOOLKIND_PAE_PD_PHYS:
2220 case PGMPOOLKIND_PAE_PDPT_PHYS:
2221 case PGMPOOLKIND_32BIT_PD_PHYS:
2222 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2223 break;
2224 default:
2225 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2226 }
2227 }
2228
2229 /* next */
2230 i = pPage->iNext;
2231 } while (i != NIL_PGMPOOL_IDX);
2232 return NULL;
2233}
2234
2235
2236/**
2237 * Enabled write monitoring of a guest page.
2238 *
2239 * @returns VBox status code.
2240 * @retval VINF_SUCCESS on success.
2241 * @param pPool The pool.
2242 * @param pPage The cached page.
2243 */
2244static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2245{
2246 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
2247
2248 /*
2249 * Filter out the relevant kinds.
2250 */
2251 switch (pPage->enmKind)
2252 {
2253 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2254 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2255 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2256 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2257 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2258 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2259 case PGMPOOLKIND_64BIT_PML4:
2260 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2261 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2262 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2263 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2264 case PGMPOOLKIND_32BIT_PD:
2265 case PGMPOOLKIND_PAE_PDPT:
2266 break;
2267
2268 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2269 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2270 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2271 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2272 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2273 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2274 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2275 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2276 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2277 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2278 case PGMPOOLKIND_ROOT_NESTED:
2279 /* Nothing to monitor here. */
2280 return VINF_SUCCESS;
2281
2282 case PGMPOOLKIND_32BIT_PD_PHYS:
2283 case PGMPOOLKIND_PAE_PDPT_PHYS:
2284 case PGMPOOLKIND_PAE_PD_PHYS:
2285 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2286 /* Nothing to monitor here. */
2287 return VINF_SUCCESS;
2288 default:
2289 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2290 }
2291
2292 /*
2293 * Install handler.
2294 */
2295 int rc;
2296 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2297 if (pPageHead)
2298 {
2299 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2300 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2301
2302#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2303 if (pPageHead->fDirty)
2304 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirty, false /* do not remove */);
2305#endif
2306
2307 pPage->iMonitoredPrev = pPageHead->idx;
2308 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2309 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2310 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2311 pPageHead->iMonitoredNext = pPage->idx;
2312 rc = VINF_SUCCESS;
2313 }
2314 else
2315 {
2316 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2317 PVM pVM = pPool->CTX_SUFF(pVM);
2318 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
2319 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
2320 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
2321 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
2322 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
2323 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
2324 pPool->pszAccessHandler);
2325 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2326 * the heap size should suffice. */
2327 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2328 Assert(!(VMMGetCpu(pVM)->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(VMMGetCpu(pVM), VMCPU_FF_PGM_SYNC_CR3));
2329 }
2330 pPage->fMonitored = true;
2331 return rc;
2332}
2333
2334
2335/**
2336 * Disables write monitoring of a guest page.
2337 *
2338 * @returns VBox status code.
2339 * @retval VINF_SUCCESS on success.
2340 * @param pPool The pool.
2341 * @param pPage The cached page.
2342 */
2343static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2344{
2345 /*
2346 * Filter out the relevant kinds.
2347 */
2348 switch (pPage->enmKind)
2349 {
2350 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2351 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2352 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2353 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2354 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2355 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2356 case PGMPOOLKIND_64BIT_PML4:
2357 case PGMPOOLKIND_32BIT_PD:
2358 case PGMPOOLKIND_PAE_PDPT:
2359 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2360 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2361 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2362 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2363 break;
2364
2365 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2366 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2367 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2368 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2369 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2370 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2371 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2372 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2373 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2374 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2375 case PGMPOOLKIND_ROOT_NESTED:
2376 case PGMPOOLKIND_PAE_PD_PHYS:
2377 case PGMPOOLKIND_PAE_PDPT_PHYS:
2378 case PGMPOOLKIND_32BIT_PD_PHYS:
2379 /* Nothing to monitor here. */
2380 return VINF_SUCCESS;
2381
2382 default:
2383 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2384 }
2385
2386 /*
2387 * Remove the page from the monitored list or uninstall it if last.
2388 */
2389 const PVM pVM = pPool->CTX_SUFF(pVM);
2390 int rc;
2391 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2392 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2393 {
2394 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2395 {
2396 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2397 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2398 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
2399 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
2400 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
2401 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
2402 pPool->pszAccessHandler);
2403 AssertFatalRCSuccess(rc);
2404 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2405 }
2406 else
2407 {
2408 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2409 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2410 {
2411 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2412 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2413 }
2414 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2415 rc = VINF_SUCCESS;
2416 }
2417 }
2418 else
2419 {
2420 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
2421 AssertFatalRC(rc);
2422#ifdef VBOX_STRICT
2423 PVMCPU pVCpu = VMMGetCpu(pVM);
2424#endif
2425 AssertMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2426 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2427 }
2428 pPage->fMonitored = false;
2429
2430 /*
2431 * Remove it from the list of modified pages (if in it).
2432 */
2433 pgmPoolMonitorModifiedRemove(pPool, pPage);
2434
2435 return rc;
2436}
2437
2438
2439/**
2440 * Inserts the page into the list of modified pages.
2441 *
2442 * @param pPool The pool.
2443 * @param pPage The page.
2444 */
2445void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2446{
2447 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2448 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2449 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2450 && pPool->iModifiedHead != pPage->idx,
2451 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2452 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2453 pPool->iModifiedHead, pPool->cModifiedPages));
2454
2455 pPage->iModifiedNext = pPool->iModifiedHead;
2456 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2457 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2458 pPool->iModifiedHead = pPage->idx;
2459 pPool->cModifiedPages++;
2460#ifdef VBOX_WITH_STATISTICS
2461 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2462 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2463#endif
2464}
2465
2466
2467/**
2468 * Removes the page from the list of modified pages and resets the
2469 * moficiation counter.
2470 *
2471 * @param pPool The pool.
2472 * @param pPage The page which is believed to be in the list of modified pages.
2473 */
2474static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2475{
2476 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2477 if (pPool->iModifiedHead == pPage->idx)
2478 {
2479 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2480 pPool->iModifiedHead = pPage->iModifiedNext;
2481 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2482 {
2483 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2484 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2485 }
2486 pPool->cModifiedPages--;
2487 }
2488 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2489 {
2490 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2491 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2492 {
2493 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2494 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2495 }
2496 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2497 pPool->cModifiedPages--;
2498 }
2499 else
2500 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2501 pPage->cModifications = 0;
2502}
2503
2504
2505/**
2506 * Zaps the list of modified pages, resetting their modification counters in the process.
2507 *
2508 * @param pVM The VM handle.
2509 */
2510static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2511{
2512 pgmLock(pVM);
2513 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2514 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2515
2516 unsigned cPages = 0; NOREF(cPages);
2517
2518#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2519 pgmPoolResetDirtyPages(pVM);
2520#endif
2521
2522 uint16_t idx = pPool->iModifiedHead;
2523 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2524 while (idx != NIL_PGMPOOL_IDX)
2525 {
2526 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2527 idx = pPage->iModifiedNext;
2528 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2529 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2530 pPage->cModifications = 0;
2531 Assert(++cPages);
2532 }
2533 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2534 pPool->cModifiedPages = 0;
2535 pgmUnlock(pVM);
2536}
2537
2538
2539/**
2540 * Handle SyncCR3 pool tasks
2541 *
2542 * @returns VBox status code.
2543 * @retval VINF_SUCCESS if successfully added.
2544 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2545 * @param pVCpu The VMCPU handle.
2546 * @remark Should only be used when monitoring is available, thus placed in
2547 * the PGMPOOL_WITH_MONITORING #ifdef.
2548 */
2549int pgmPoolSyncCR3(PVMCPU pVCpu)
2550{
2551 PVM pVM = pVCpu->CTX_SUFF(pVM);
2552 LogFlow(("pgmPoolSyncCR3\n"));
2553
2554 /*
2555 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2556 * Occasionally we will have to clear all the shadow page tables because we wanted
2557 * to monitor a page which was mapped by too many shadowed page tables. This operation
2558 * sometimes refered to as a 'lightweight flush'.
2559 */
2560# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2561 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2562 pgmR3PoolClearAll(pVM);
2563# else /* !IN_RING3 */
2564 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2565 {
2566 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2567 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2568
2569 /* Make sure all other VCPUs return to ring 3. */
2570 if (pVM->cCpus > 1)
2571 {
2572 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2573 PGM_INVL_ALL_VCPU_TLBS(pVM);
2574 }
2575 return VINF_PGM_SYNC_CR3;
2576 }
2577# endif /* !IN_RING3 */
2578 else
2579 pgmPoolMonitorModifiedClearAll(pVM);
2580
2581 return VINF_SUCCESS;
2582}
2583
2584
2585/**
2586 * Frees up at least one user entry.
2587 *
2588 * @returns VBox status code.
2589 * @retval VINF_SUCCESS if successfully added.
2590 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2591 * @param pPool The pool.
2592 * @param iUser The user index.
2593 */
2594static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2595{
2596 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2597 /*
2598 * Just free cached pages in a braindead fashion.
2599 */
2600 /** @todo walk the age list backwards and free the first with usage. */
2601 int rc = VINF_SUCCESS;
2602 do
2603 {
2604 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2605 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2606 rc = rc2;
2607 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2608 return rc;
2609}
2610
2611
2612/**
2613 * Inserts a page into the cache.
2614 *
2615 * This will create user node for the page, insert it into the GCPhys
2616 * hash, and insert it into the age list.
2617 *
2618 * @returns VBox status code.
2619 * @retval VINF_SUCCESS if successfully added.
2620 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2621 * @param pPool The pool.
2622 * @param pPage The cached page.
2623 * @param GCPhys The GC physical address of the page we're gonna shadow.
2624 * @param iUser The user index.
2625 * @param iUserTable The user table index.
2626 */
2627DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2628{
2629 int rc = VINF_SUCCESS;
2630 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2631
2632 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser %x iUserTable %x\n", GCPhys, iUser, iUserTable));
2633
2634#ifdef VBOX_STRICT
2635 /*
2636 * Check that the entry doesn't already exists.
2637 */
2638 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2639 {
2640 uint16_t i = pPage->iUserHead;
2641 do
2642 {
2643 Assert(i < pPool->cMaxUsers);
2644 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2645 i = paUsers[i].iNext;
2646 } while (i != NIL_PGMPOOL_USER_INDEX);
2647 }
2648#endif
2649
2650 /*
2651 * Find free a user node.
2652 */
2653 uint16_t i = pPool->iUserFreeHead;
2654 if (i == NIL_PGMPOOL_USER_INDEX)
2655 {
2656 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2657 if (RT_FAILURE(rc))
2658 return rc;
2659 i = pPool->iUserFreeHead;
2660 }
2661
2662 /*
2663 * Unlink the user node from the free list,
2664 * initialize and insert it into the user list.
2665 */
2666 pPool->iUserFreeHead = paUsers[i].iNext;
2667 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2668 paUsers[i].iUser = iUser;
2669 paUsers[i].iUserTable = iUserTable;
2670 pPage->iUserHead = i;
2671
2672 /*
2673 * Insert into cache and enable monitoring of the guest page if enabled.
2674 *
2675 * Until we implement caching of all levels, including the CR3 one, we'll
2676 * have to make sure we don't try monitor & cache any recursive reuse of
2677 * a monitored CR3 page. Because all windows versions are doing this we'll
2678 * have to be able to do combined access monitoring, CR3 + PT and
2679 * PD + PT (guest PAE).
2680 *
2681 * Update:
2682 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2683 */
2684 const bool fCanBeMonitored = true;
2685 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2686 if (fCanBeMonitored)
2687 {
2688 rc = pgmPoolMonitorInsert(pPool, pPage);
2689 AssertRC(rc);
2690 }
2691 return rc;
2692}
2693
2694
2695/**
2696 * Adds a user reference to a page.
2697 *
2698 * This will move the page to the head of the
2699 *
2700 * @returns VBox status code.
2701 * @retval VINF_SUCCESS if successfully added.
2702 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2703 * @param pPool The pool.
2704 * @param pPage The cached page.
2705 * @param iUser The user index.
2706 * @param iUserTable The user table.
2707 */
2708static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2709{
2710 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2711
2712 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2713
2714# ifdef VBOX_STRICT
2715 /*
2716 * Check that the entry doesn't already exists. We only allow multiple users of top-level paging structures (SHW_POOL_ROOT_IDX).
2717 */
2718 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2719 {
2720 uint16_t i = pPage->iUserHead;
2721 do
2722 {
2723 Assert(i < pPool->cMaxUsers);
2724 AssertMsg(iUser != PGMPOOL_IDX_PD || iUser != PGMPOOL_IDX_PDPT || iUser != PGMPOOL_IDX_NESTED_ROOT || iUser != PGMPOOL_IDX_AMD64_CR3 ||
2725 paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2726 i = paUsers[i].iNext;
2727 } while (i != NIL_PGMPOOL_USER_INDEX);
2728 }
2729# endif
2730
2731 /*
2732 * Allocate a user node.
2733 */
2734 uint16_t i = pPool->iUserFreeHead;
2735 if (i == NIL_PGMPOOL_USER_INDEX)
2736 {
2737 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2738 if (RT_FAILURE(rc))
2739 return rc;
2740 i = pPool->iUserFreeHead;
2741 }
2742 pPool->iUserFreeHead = paUsers[i].iNext;
2743
2744 /*
2745 * Initialize the user node and insert it.
2746 */
2747 paUsers[i].iNext = pPage->iUserHead;
2748 paUsers[i].iUser = iUser;
2749 paUsers[i].iUserTable = iUserTable;
2750 pPage->iUserHead = i;
2751
2752# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2753 if (pPage->fDirty)
2754 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirty, false /* do not remove */);
2755# endif
2756
2757 /*
2758 * Tell the cache to update its replacement stats for this page.
2759 */
2760 pgmPoolCacheUsed(pPool, pPage);
2761 return VINF_SUCCESS;
2762}
2763
2764
2765/**
2766 * Frees a user record associated with a page.
2767 *
2768 * This does not clear the entry in the user table, it simply replaces the
2769 * user record to the chain of free records.
2770 *
2771 * @param pPool The pool.
2772 * @param HCPhys The HC physical address of the shadow page.
2773 * @param iUser The shadow page pool index of the user table.
2774 * @param iUserTable The index into the user table (shadowed).
2775 */
2776static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2777{
2778 /*
2779 * Unlink and free the specified user entry.
2780 */
2781 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2782
2783 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2784 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2785 uint16_t i = pPage->iUserHead;
2786 if ( i != NIL_PGMPOOL_USER_INDEX
2787 && paUsers[i].iUser == iUser
2788 && paUsers[i].iUserTable == iUserTable)
2789 {
2790 pPage->iUserHead = paUsers[i].iNext;
2791
2792 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2793 paUsers[i].iNext = pPool->iUserFreeHead;
2794 pPool->iUserFreeHead = i;
2795 return;
2796 }
2797
2798 /* General: Linear search. */
2799 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2800 while (i != NIL_PGMPOOL_USER_INDEX)
2801 {
2802 if ( paUsers[i].iUser == iUser
2803 && paUsers[i].iUserTable == iUserTable)
2804 {
2805 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2806 paUsers[iPrev].iNext = paUsers[i].iNext;
2807 else
2808 pPage->iUserHead = paUsers[i].iNext;
2809
2810 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2811 paUsers[i].iNext = pPool->iUserFreeHead;
2812 pPool->iUserFreeHead = i;
2813 return;
2814 }
2815 iPrev = i;
2816 i = paUsers[i].iNext;
2817 }
2818
2819 /* Fatal: didn't find it */
2820 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2821 iUser, iUserTable, pPage->GCPhys));
2822}
2823
2824
2825/**
2826 * Gets the entry size of a shadow table.
2827 *
2828 * @param enmKind The kind of page.
2829 *
2830 * @returns The size of the entry in bytes. That is, 4 or 8.
2831 * @returns If the kind is not for a table, an assertion is raised and 0 is
2832 * returned.
2833 */
2834DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2835{
2836 switch (enmKind)
2837 {
2838 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2839 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2840 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2841 case PGMPOOLKIND_32BIT_PD:
2842 case PGMPOOLKIND_32BIT_PD_PHYS:
2843 return 4;
2844
2845 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2846 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2847 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2848 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2849 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2850 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2851 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2852 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2853 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2854 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2855 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2856 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2857 case PGMPOOLKIND_64BIT_PML4:
2858 case PGMPOOLKIND_PAE_PDPT:
2859 case PGMPOOLKIND_ROOT_NESTED:
2860 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2861 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2862 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2863 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2864 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2865 case PGMPOOLKIND_PAE_PD_PHYS:
2866 case PGMPOOLKIND_PAE_PDPT_PHYS:
2867 return 8;
2868
2869 default:
2870 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2871 }
2872}
2873
2874
2875/**
2876 * Gets the entry size of a guest table.
2877 *
2878 * @param enmKind The kind of page.
2879 *
2880 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2881 * @returns If the kind is not for a table, an assertion is raised and 0 is
2882 * returned.
2883 */
2884DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2885{
2886 switch (enmKind)
2887 {
2888 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2889 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2890 case PGMPOOLKIND_32BIT_PD:
2891 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2892 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2893 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2894 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2895 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2896 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2897 return 4;
2898
2899 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2900 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2901 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2902 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2903 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2904 case PGMPOOLKIND_64BIT_PML4:
2905 case PGMPOOLKIND_PAE_PDPT:
2906 return 8;
2907
2908 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2909 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2910 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2911 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2912 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2913 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2914 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2915 case PGMPOOLKIND_ROOT_NESTED:
2916 case PGMPOOLKIND_PAE_PD_PHYS:
2917 case PGMPOOLKIND_PAE_PDPT_PHYS:
2918 case PGMPOOLKIND_32BIT_PD_PHYS:
2919 /** @todo can we return 0? (nobody is calling this...) */
2920 AssertFailed();
2921 return 0;
2922
2923 default:
2924 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2925 }
2926}
2927
2928
2929/**
2930 * Scans one shadow page table for mappings of a physical page.
2931 *
2932 * @returns true/false indicating removal of all relevant PTEs
2933 * @param pVM The VM handle.
2934 * @param pPhysPage The guest page in question.
2935 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
2936 * @param iShw The shadow page table.
2937 * @param cRefs The number of references made in that PT.
2938 */
2939static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t cRefs)
2940{
2941 LogFlow(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d cRefs=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, cRefs));
2942 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2943 bool bRet = false;
2944
2945 /*
2946 * Assert sanity.
2947 */
2948 Assert(cRefs == 1);
2949 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2950 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2951
2952 /*
2953 * Then, clear the actual mappings to the page in the shadow PT.
2954 */
2955 switch (pPage->enmKind)
2956 {
2957 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2958 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2959 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2960 {
2961 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2962 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2963 uint32_t u32AndMask, u32OrMask;
2964
2965 u32AndMask = 0;
2966 u32OrMask = 0;
2967
2968 if (!fFlushPTEs)
2969 {
2970 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
2971 {
2972 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
2973 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
2974 u32OrMask = X86_PTE_RW;
2975 u32AndMask = UINT32_MAX;
2976 bRet = true;
2977 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
2978 break;
2979
2980 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
2981 u32OrMask = 0;
2982 u32AndMask = ~X86_PTE_RW;
2983 bRet = true;
2984 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
2985 break;
2986 default:
2987 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
2988 break;
2989 }
2990 }
2991 else
2992 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
2993
2994 /* Update the counter if we're removing references. */
2995 if (!u32AndMask)
2996 {
2997 Assert(pPage->cPresent >= cRefs);
2998 Assert(pPool->cPresent >= cRefs);
2999 pPage->cPresent -= cRefs;
3000 pPool->cPresent -= cRefs;
3001 }
3002
3003 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3004 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3005 {
3006 X86PTE Pte;
3007
3008 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
3009 Pte.u = (pPT->a[i].u & u32AndMask) | u32OrMask;
3010 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3011 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3012
3013 ASMAtomicWriteSize(&pPT->a[i].u, Pte.u);
3014 cRefs--;
3015 if (!cRefs)
3016 return bRet;
3017 }
3018#ifdef LOG_ENABLED
3019 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3020 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3021 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3022 {
3023 Log(("i=%d cRefs=%d\n", i, cRefs--));
3024 }
3025#endif
3026 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3027 break;
3028 }
3029
3030 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3031 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3032 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3033 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3034 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3035 {
3036 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3037 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3038 uint64_t u64AndMask, u64OrMask;
3039
3040 u64OrMask = 0;
3041 u64AndMask = 0;
3042 if (!fFlushPTEs)
3043 {
3044 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3045 {
3046 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
3047 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
3048 u64OrMask = X86_PTE_RW;
3049 u64AndMask = UINT64_MAX;
3050 bRet = true;
3051 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3052 break;
3053
3054 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3055 u64OrMask = 0;
3056 u64AndMask = ~((uint64_t)X86_PTE_RW);
3057 bRet = true;
3058 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3059 break;
3060
3061 default:
3062 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3063 break;
3064 }
3065 }
3066 else
3067 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3068
3069 /* Update the counter if we're removing references. */
3070 if (!u64AndMask)
3071 {
3072 Assert(pPage->cPresent >= cRefs);
3073 Assert(pPool->cPresent >= cRefs);
3074 pPage->cPresent -= cRefs;
3075 pPool->cPresent -= cRefs;
3076 }
3077
3078 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3079 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3080 {
3081 X86PTEPAE Pte;
3082
3083 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
3084 Pte.u = (pPT->a[i].u & u64AndMask) | u64OrMask;
3085 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3086 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3087
3088 ASMAtomicWriteSize(&pPT->a[i].u, Pte.u);
3089 cRefs--;
3090 if (!cRefs)
3091 return bRet;
3092 }
3093#ifdef LOG_ENABLED
3094 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3095 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3096 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3097 {
3098 Log(("i=%d cRefs=%d\n", i, cRefs--));
3099 }
3100#endif
3101 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u64=%RX64\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u64));
3102 break;
3103 }
3104
3105 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3106 {
3107 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3108 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3109 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3110 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3111 {
3112 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
3113 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3114 pPT->a[i].u = 0;
3115 cRefs--;
3116
3117 /* Update the counter as we're removing references. */
3118 Assert(pPage->cPresent);
3119 Assert(pPool->cPresent);
3120 pPage->cPresent--;
3121 pPool->cPresent--;
3122
3123 if (!cRefs)
3124 return bRet;
3125 }
3126#ifdef LOG_ENABLED
3127 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3128 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3129 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3130 {
3131 Log(("i=%d cRefs=%d\n", i, cRefs--));
3132 }
3133#endif
3134 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3135 break;
3136 }
3137
3138#ifdef PGM_WITH_LARGE_PAGES
3139 /* Large page case only. */
3140 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3141 {
3142 Assert(HWACCMIsNestedPagingActive(pVM));
3143 Assert(cRefs == 1);
3144
3145 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3146 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3147 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPD->a); i++)
3148 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3149 {
3150 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64 cRefs=%#x\n", i, pPD->a[i], cRefs));
3151 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3152 pPD->a[i].u = 0;
3153 cRefs--;
3154
3155 /* Update the counter as we're removing references. */
3156 Assert(pPage->cPresent);
3157 Assert(pPool->cPresent);
3158 pPage->cPresent--;
3159 pPool->cPresent--;
3160
3161 if (!cRefs)
3162 return bRet;
3163 }
3164# ifdef LOG_ENABLED
3165 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3166 for (unsigned i = 0; i < RT_ELEMENTS(pPD->a); i++)
3167 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3168 {
3169 Log(("i=%d cRefs=%d\n", i, cRefs--));
3170 }
3171# endif
3172 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3173 break;
3174 }
3175
3176 /* AMD-V nested paging - @todo merge with EPT as we only check the parts that are identical. */
3177 case PGMPOOLKIND_PAE_PD_PHYS:
3178 {
3179 Assert(HWACCMIsNestedPagingActive(pVM));
3180 Assert(cRefs == 1);
3181
3182 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3183 PX86PD pPD = (PX86PD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3184 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPD->a); i++)
3185 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3186 {
3187 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64 cRefs=%#x\n", i, pPD->a[i], cRefs));
3188 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3189 pPD->a[i].u = 0;
3190 cRefs--;
3191
3192 /* Update the counter as we're removing references. */
3193 Assert(pPage->cPresent);
3194 Assert(pPool->cPresent);
3195 pPage->cPresent--;
3196 pPool->cPresent--;
3197
3198 if (!cRefs)
3199 return bRet;
3200 }
3201# ifdef LOG_ENABLED
3202 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3203 for (unsigned i = 0; i < RT_ELEMENTS(pPD->a); i++)
3204 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3205 {
3206 Log(("i=%d cRefs=%d\n", i, cRefs--));
3207 }
3208# endif
3209 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3210 break;
3211 }
3212#endif /* PGM_WITH_LARGE_PAGES */
3213
3214 default:
3215 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3216 }
3217 return bRet;
3218}
3219
3220
3221/**
3222 * Scans one shadow page table for mappings of a physical page.
3223 *
3224 * @param pVM The VM handle.
3225 * @param pPhysPage The guest page in question.
3226 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3227 * @param iShw The shadow page table.
3228 * @param cRefs The number of references made in that PT.
3229 */
3230static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t cRefs)
3231{
3232 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3233
3234 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d cRefs=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, cRefs));
3235 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3236 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, cRefs);
3237 if (!fKeptPTEs)
3238 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3239 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3240}
3241
3242
3243/**
3244 * Flushes a list of shadow page tables mapping the same physical page.
3245 *
3246 * @param pVM The VM handle.
3247 * @param pPhysPage The guest page in question.
3248 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3249 * @param iPhysExt The physical cross reference extent list to flush.
3250 */
3251static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3252{
3253 Assert(PGMIsLockOwner(pVM));
3254 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3255 bool fKeepList = false;
3256
3257 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3258 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3259
3260 const uint16_t iPhysExtStart = iPhysExt;
3261 PPGMPOOLPHYSEXT pPhysExt;
3262 do
3263 {
3264 Assert(iPhysExt < pPool->cMaxPhysExts);
3265 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3266 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3267 {
3268 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3269 {
3270 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], 1);
3271 if (!fKeptPTEs)
3272 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3273 else
3274 fKeepList = true;
3275 }
3276 }
3277 /* next */
3278 iPhysExt = pPhysExt->iNext;
3279 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3280
3281 if (!fKeepList)
3282 {
3283 /* insert the list into the free list and clear the ram range entry. */
3284 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3285 pPool->iPhysExtFreeHead = iPhysExtStart;
3286 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3287 }
3288
3289 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3290}
3291
3292
3293/**
3294 * Flushes all shadow page table mappings of the given guest page.
3295 *
3296 * This is typically called when the host page backing the guest one has been
3297 * replaced or when the page protection was changed due to an access handler.
3298 *
3299 * @returns VBox status code.
3300 * @retval VINF_SUCCESS if all references has been successfully cleared.
3301 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3302 * pool cleaning. FF and sync flags are set.
3303 *
3304 * @param pVM The VM handle.
3305 * @param GCPhysPage GC physical address of the page in question
3306 * @param pPhysPage The guest page in question.
3307 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3308 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3309 * flushed, it is NOT touched if this isn't necessary.
3310 * The caller MUST initialized this to @a false.
3311 */
3312int pgmPoolTrackUpdateGCPhys(PVM pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3313{
3314 PVMCPU pVCpu = VMMGetCpu(pVM);
3315 pgmLock(pVM);
3316 int rc = VINF_SUCCESS;
3317
3318#ifdef PGM_WITH_LARGE_PAGES
3319 /* Is this page part of a large page? */
3320 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3321 {
3322 PPGMPAGE pPhysBase;
3323 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3324
3325 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3326
3327 /* Fetch the large page base. */
3328 if (GCPhysBase != GCPhysPage)
3329 {
3330 pPhysBase = pgmPhysGetPage(&pVM->pgm.s, GCPhysBase);
3331 AssertFatal(pPhysBase);
3332 }
3333 else
3334 pPhysBase = pPhysPage;
3335
3336 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3337
3338 if (PGM_PAGE_GET_PDE_TYPE(pPhysBase) == PGM_PAGE_PDE_TYPE_PDE)
3339 {
3340 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3341 PGM_PAGE_SET_PDE_TYPE(pPhysBase, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3342
3343 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3344 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pPhysBase, fFlushPTEs, pfFlushTLBs);
3345
3346 *pfFlushTLBs = true;
3347 pgmUnlock(pVM);
3348 return rc;
3349 }
3350 }
3351#else
3352 NOREF(GCPhysPage);
3353#endif /* PGM_WITH_LARGE_PAGES */
3354
3355 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3356 if (u16)
3357 {
3358 /*
3359 * The zero page is currently screwing up the tracking and we'll
3360 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3361 * is defined, zero pages won't normally be mapped. Some kind of solution
3362 * will be needed for this problem of course, but it will have to wait...
3363 */
3364 if (PGM_PAGE_IS_ZERO(pPhysPage))
3365 rc = VINF_PGM_GCPHYS_ALIASED;
3366 else
3367 {
3368# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3369 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3370 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3371 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
3372# endif
3373
3374 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3375 pgmPoolTrackFlushGCPhysPT(pVM,
3376 pPhysPage,
3377 fFlushPTEs,
3378 PGMPOOL_TD_GET_IDX(u16),
3379 PGMPOOL_TD_GET_CREFS(u16));
3380 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3381 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3382 else
3383 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3384 *pfFlushTLBs = true;
3385
3386# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3387 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
3388# endif
3389 }
3390 }
3391
3392 if (rc == VINF_PGM_GCPHYS_ALIASED)
3393 {
3394 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3395 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3396 rc = VINF_PGM_SYNC_CR3;
3397 }
3398 pgmUnlock(pVM);
3399 return rc;
3400}
3401
3402
3403/**
3404 * Scans all shadow page tables for mappings of a physical page.
3405 *
3406 * This may be slow, but it's most likely more efficient than cleaning
3407 * out the entire page pool / cache.
3408 *
3409 * @returns VBox status code.
3410 * @retval VINF_SUCCESS if all references has been successfully cleared.
3411 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3412 * a page pool cleaning.
3413 *
3414 * @param pVM The VM handle.
3415 * @param pPhysPage The guest page in question.
3416 */
3417int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3418{
3419 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3420 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3421 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3422 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3423
3424#if 1
3425 /*
3426 * There is a limit to what makes sense.
3427 */
3428 if (pPool->cPresent > 1024)
3429 {
3430 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3431 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3432 return VINF_PGM_GCPHYS_ALIASED;
3433 }
3434#endif
3435
3436 /*
3437 * Iterate all the pages until we've encountered all that in use.
3438 * This is simple but not quite optimal solution.
3439 */
3440 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3441 const uint32_t u32 = u64;
3442 unsigned cLeft = pPool->cUsedPages;
3443 unsigned iPage = pPool->cCurPages;
3444 while (--iPage >= PGMPOOL_IDX_FIRST)
3445 {
3446 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3447 if (pPage->GCPhys != NIL_RTGCPHYS)
3448 {
3449 switch (pPage->enmKind)
3450 {
3451 /*
3452 * We only care about shadow page tables.
3453 */
3454 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3455 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3456 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3457 {
3458 unsigned cPresent = pPage->cPresent;
3459 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3460 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3461 if (pPT->a[i].n.u1Present)
3462 {
3463 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3464 {
3465 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3466 pPT->a[i].u = 0;
3467
3468 /* Update the counter as we're removing references. */
3469 Assert(pPage->cPresent);
3470 Assert(pPool->cPresent);
3471 pPage->cPresent--;
3472 pPool->cPresent--;
3473 }
3474 if (!--cPresent)
3475 break;
3476 }
3477 break;
3478 }
3479
3480 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3481 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3482 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3483 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3484 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3485 {
3486 unsigned cPresent = pPage->cPresent;
3487 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3488 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3489 if (pPT->a[i].n.u1Present)
3490 {
3491 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3492 {
3493 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3494 pPT->a[i].u = 0;
3495
3496 /* Update the counter as we're removing references. */
3497 Assert(pPage->cPresent);
3498 Assert(pPool->cPresent);
3499 pPage->cPresent--;
3500 pPool->cPresent--;
3501 }
3502 if (!--cPresent)
3503 break;
3504 }
3505 break;
3506 }
3507#ifndef IN_RC
3508 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3509 {
3510 unsigned cPresent = pPage->cPresent;
3511 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3512 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3513 if (pPT->a[i].n.u1Present)
3514 {
3515 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3516 {
3517 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3518 pPT->a[i].u = 0;
3519
3520 /* Update the counter as we're removing references. */
3521 Assert(pPage->cPresent);
3522 Assert(pPool->cPresent);
3523 pPage->cPresent--;
3524 pPool->cPresent--;
3525 }
3526 if (!--cPresent)
3527 break;
3528 }
3529 break;
3530 }
3531#endif
3532 }
3533 if (!--cLeft)
3534 break;
3535 }
3536 }
3537
3538 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3539 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3540 return VINF_SUCCESS;
3541}
3542
3543
3544/**
3545 * Clears the user entry in a user table.
3546 *
3547 * This is used to remove all references to a page when flushing it.
3548 */
3549static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3550{
3551 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3552 Assert(pUser->iUser < pPool->cCurPages);
3553 uint32_t iUserTable = pUser->iUserTable;
3554
3555 /*
3556 * Map the user page.
3557 */
3558 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3559 union
3560 {
3561 uint64_t *pau64;
3562 uint32_t *pau32;
3563 } u;
3564 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3565
3566 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3567
3568 /* Safety precaution in case we change the paging for other modes too in the future. */
3569 Assert(!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage));
3570
3571#ifdef VBOX_STRICT
3572 /*
3573 * Some sanity checks.
3574 */
3575 switch (pUserPage->enmKind)
3576 {
3577 case PGMPOOLKIND_32BIT_PD:
3578 case PGMPOOLKIND_32BIT_PD_PHYS:
3579 Assert(iUserTable < X86_PG_ENTRIES);
3580 break;
3581 case PGMPOOLKIND_PAE_PDPT:
3582 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3583 case PGMPOOLKIND_PAE_PDPT_PHYS:
3584 Assert(iUserTable < 4);
3585 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3586 break;
3587 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3588 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3589 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3590 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3591 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3592 case PGMPOOLKIND_PAE_PD_PHYS:
3593 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3594 break;
3595 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3596 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3597 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3598 break;
3599 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3600 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3601 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3602 break;
3603 case PGMPOOLKIND_64BIT_PML4:
3604 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3605 /* GCPhys >> PAGE_SHIFT is the index here */
3606 break;
3607 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3608 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3609 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3610 break;
3611
3612 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3613 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3614 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3615 break;
3616
3617 case PGMPOOLKIND_ROOT_NESTED:
3618 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3619 break;
3620
3621 default:
3622 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3623 break;
3624 }
3625#endif /* VBOX_STRICT */
3626
3627 /*
3628 * Clear the entry in the user page.
3629 */
3630 switch (pUserPage->enmKind)
3631 {
3632 /* 32-bit entries */
3633 case PGMPOOLKIND_32BIT_PD:
3634 case PGMPOOLKIND_32BIT_PD_PHYS:
3635 ASMAtomicWriteSize(&u.pau32[iUserTable], 0);
3636 break;
3637
3638 /* 64-bit entries */
3639 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3640 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3641 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3642 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3643 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3644#if defined(IN_RC)
3645 /* In 32 bits PAE mode we *must* invalidate the TLB when changing a PDPT entry; the CPU fetches them only during cr3 load, so any
3646 * non-present PDPT will continue to cause page faults.
3647 */
3648 ASMReloadCR3();
3649#endif
3650 /* no break */
3651 case PGMPOOLKIND_PAE_PD_PHYS:
3652 case PGMPOOLKIND_PAE_PDPT_PHYS:
3653 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3654 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3655 case PGMPOOLKIND_64BIT_PML4:
3656 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3657 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3658 case PGMPOOLKIND_PAE_PDPT:
3659 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3660 case PGMPOOLKIND_ROOT_NESTED:
3661 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3662 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3663 ASMAtomicWriteSize(&u.pau64[iUserTable], 0);
3664 break;
3665
3666 default:
3667 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3668 }
3669}
3670
3671
3672/**
3673 * Clears all users of a page.
3674 */
3675static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3676{
3677 /*
3678 * Free all the user records.
3679 */
3680 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3681
3682 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3683 uint16_t i = pPage->iUserHead;
3684 while (i != NIL_PGMPOOL_USER_INDEX)
3685 {
3686 /* Clear enter in user table. */
3687 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3688
3689 /* Free it. */
3690 const uint16_t iNext = paUsers[i].iNext;
3691 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3692 paUsers[i].iNext = pPool->iUserFreeHead;
3693 pPool->iUserFreeHead = i;
3694
3695 /* Next. */
3696 i = iNext;
3697 }
3698 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3699}
3700
3701
3702/**
3703 * Allocates a new physical cross reference extent.
3704 *
3705 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3706 * @param pVM The VM handle.
3707 * @param piPhysExt Where to store the phys ext index.
3708 */
3709PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3710{
3711 Assert(PGMIsLockOwner(pVM));
3712 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3713 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3714 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3715 {
3716 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3717 return NULL;
3718 }
3719 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3720 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3721 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3722 *piPhysExt = iPhysExt;
3723 return pPhysExt;
3724}
3725
3726
3727/**
3728 * Frees a physical cross reference extent.
3729 *
3730 * @param pVM The VM handle.
3731 * @param iPhysExt The extent to free.
3732 */
3733void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3734{
3735 Assert(PGMIsLockOwner(pVM));
3736 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3737 Assert(iPhysExt < pPool->cMaxPhysExts);
3738 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3739 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3740 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3741 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3742 pPool->iPhysExtFreeHead = iPhysExt;
3743}
3744
3745
3746/**
3747 * Frees a physical cross reference extent.
3748 *
3749 * @param pVM The VM handle.
3750 * @param iPhysExt The extent to free.
3751 */
3752void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3753{
3754 Assert(PGMIsLockOwner(pVM));
3755 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3756
3757 const uint16_t iPhysExtStart = iPhysExt;
3758 PPGMPOOLPHYSEXT pPhysExt;
3759 do
3760 {
3761 Assert(iPhysExt < pPool->cMaxPhysExts);
3762 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3763 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3764 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3765
3766 /* next */
3767 iPhysExt = pPhysExt->iNext;
3768 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3769
3770 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3771 pPool->iPhysExtFreeHead = iPhysExtStart;
3772}
3773
3774
3775/**
3776 * Insert a reference into a list of physical cross reference extents.
3777 *
3778 * @returns The new tracking data for PGMPAGE.
3779 *
3780 * @param pVM The VM handle.
3781 * @param iPhysExt The physical extent index of the list head.
3782 * @param iShwPT The shadow page table index.
3783 *
3784 */
3785static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
3786{
3787 Assert(PGMIsLockOwner(pVM));
3788 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3789 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3790
3791 /* special common case. */
3792 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3793 {
3794 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3795 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3796 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d}\n", iPhysExt, iShwPT));
3797 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3798 }
3799
3800 /* general treatment. */
3801 const uint16_t iPhysExtStart = iPhysExt;
3802 unsigned cMax = 15;
3803 for (;;)
3804 {
3805 Assert(iPhysExt < pPool->cMaxPhysExts);
3806 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3807 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3808 {
3809 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3810 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3811 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
3812 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3813 }
3814 if (!--cMax)
3815 {
3816 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3817 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3818 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
3819 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3820 }
3821 }
3822
3823 /* add another extent to the list. */
3824 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3825 if (!pNew)
3826 {
3827 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3828 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3829 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
3830 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3831 }
3832 pNew->iNext = iPhysExtStart;
3833 pNew->aidx[0] = iShwPT;
3834 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
3835 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3836}
3837
3838
3839/**
3840 * Add a reference to guest physical page where extents are in use.
3841 *
3842 * @returns The new tracking data for PGMPAGE.
3843 *
3844 * @param pVM The VM handle.
3845 * @param u16 The ram range flags (top 16-bits).
3846 * @param iShwPT The shadow page table index.
3847 */
3848uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
3849{
3850 pgmLock(pVM);
3851 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3852 {
3853 /*
3854 * Convert to extent list.
3855 */
3856 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3857 uint16_t iPhysExt;
3858 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3859 if (pPhysExt)
3860 {
3861 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
3862 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
3863 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
3864 pPhysExt->aidx[1] = iShwPT;
3865 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3866 }
3867 else
3868 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3869 }
3870 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3871 {
3872 /*
3873 * Insert into the extent list.
3874 */
3875 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT);
3876 }
3877 else
3878 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3879 pgmUnlock(pVM);
3880 return u16;
3881}
3882
3883
3884/**
3885 * Clear references to guest physical memory.
3886 *
3887 * @param pPool The pool.
3888 * @param pPage The page.
3889 * @param pPhysPage Pointer to the aPages entry in the ram range.
3890 */
3891void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
3892{
3893 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
3894 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3895
3896 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
3897 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
3898 {
3899 PVM pVM = pPool->CTX_SUFF(pVM);
3900 pgmLock(pVM);
3901
3902 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3903 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3904 do
3905 {
3906 Assert(iPhysExt < pPool->cMaxPhysExts);
3907
3908 /*
3909 * Look for the shadow page and check if it's all freed.
3910 */
3911 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3912 {
3913 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
3914 {
3915 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3916
3917 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3918 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3919 {
3920 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3921 pgmUnlock(pVM);
3922 return;
3923 }
3924
3925 /* we can free the node. */
3926 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3927 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3928 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3929 {
3930 /* lonely node */
3931 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3932 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
3933 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3934 }
3935 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3936 {
3937 /* head */
3938 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
3939 PGM_PAGE_SET_TRACKING(pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
3940 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3941 }
3942 else
3943 {
3944 /* in list */
3945 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3946 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3947 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3948 }
3949 iPhysExt = iPhysExtNext;
3950 pgmUnlock(pVM);
3951 return;
3952 }
3953 }
3954
3955 /* next */
3956 iPhysExtPrev = iPhysExt;
3957 iPhysExt = paPhysExts[iPhysExt].iNext;
3958 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3959
3960 pgmUnlock(pVM);
3961 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3962 }
3963 else /* nothing to do */
3964 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
3965}
3966
3967
3968/**
3969 * Clear references to guest physical memory.
3970 *
3971 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3972 * is assumed to be correct, so the linear search can be skipped and we can assert
3973 * at an earlier point.
3974 *
3975 * @param pPool The pool.
3976 * @param pPage The page.
3977 * @param HCPhys The host physical address corresponding to the guest page.
3978 * @param GCPhys The guest physical address corresponding to HCPhys.
3979 */
3980static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3981{
3982 /*
3983 * Walk range list.
3984 */
3985 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3986 while (pRam)
3987 {
3988 RTGCPHYS off = GCPhys - pRam->GCPhys;
3989 if (off < pRam->cb)
3990 {
3991 /* does it match? */
3992 const unsigned iPage = off >> PAGE_SHIFT;
3993 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3994#ifdef LOG_ENABLED
3995 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3996 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
3997#endif
3998 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3999 {
4000 Assert(pPage->cPresent);
4001 Assert(pPool->cPresent);
4002 pPage->cPresent--;
4003 pPool->cPresent--;
4004 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
4005 return;
4006 }
4007 break;
4008 }
4009 pRam = pRam->CTX_SUFF(pNext);
4010 }
4011 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4012}
4013
4014
4015/**
4016 * Clear references to guest physical memory.
4017 *
4018 * @param pPool The pool.
4019 * @param pPage The page.
4020 * @param HCPhys The host physical address corresponding to the guest page.
4021 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4022 */
4023static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
4024{
4025 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4026
4027 /*
4028 * Walk range list.
4029 */
4030 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
4031 while (pRam)
4032 {
4033 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
4034 if (off < pRam->cb)
4035 {
4036 /* does it match? */
4037 const unsigned iPage = off >> PAGE_SHIFT;
4038 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
4039 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4040 {
4041 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
4042 return;
4043 }
4044 break;
4045 }
4046 pRam = pRam->CTX_SUFF(pNext);
4047 }
4048
4049 /*
4050 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4051 */
4052 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4053 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
4054 while (pRam)
4055 {
4056 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4057 while (iPage-- > 0)
4058 {
4059 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4060 {
4061 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4062 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4063 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
4064 return;
4065 }
4066 }
4067 pRam = pRam->CTX_SUFF(pNext);
4068 }
4069
4070 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
4071}
4072
4073
4074/**
4075 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4076 *
4077 * @param pPool The pool.
4078 * @param pPage The page.
4079 * @param pShwPT The shadow page table (mapping of the page).
4080 * @param pGstPT The guest page table.
4081 */
4082DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4083{
4084 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4085 if (pShwPT->a[i].n.u1Present)
4086 {
4087 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4088 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4089 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
4090 if (!pPage->cPresent)
4091 break;
4092 }
4093}
4094
4095
4096/**
4097 * Clear references to guest physical memory in a PAE / 32-bit page table.
4098 *
4099 * @param pPool The pool.
4100 * @param pPage The page.
4101 * @param pShwPT The shadow page table (mapping of the page).
4102 * @param pGstPT The guest page table (just a half one).
4103 */
4104DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
4105{
4106 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4107 if (pShwPT->a[i].n.u1Present)
4108 {
4109 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4110 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4111 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
4112 if (!pPage->cPresent)
4113 break;
4114 }
4115}
4116
4117
4118/**
4119 * Clear references to guest physical memory in a PAE / PAE page table.
4120 *
4121 * @param pPool The pool.
4122 * @param pPage The page.
4123 * @param pShwPT The shadow page table (mapping of the page).
4124 * @param pGstPT The guest page table.
4125 */
4126DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
4127{
4128 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4129 if (pShwPT->a[i].n.u1Present)
4130 {
4131 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4132 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4133 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
4134 if (!pPage->cPresent)
4135 break;
4136 }
4137}
4138
4139
4140/**
4141 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4142 *
4143 * @param pPool The pool.
4144 * @param pPage The page.
4145 * @param pShwPT The shadow page table (mapping of the page).
4146 */
4147DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4148{
4149 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4150 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4151 if (pShwPT->a[i].n.u1Present)
4152 {
4153 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4154 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4155 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
4156 if (!pPage->cPresent)
4157 break;
4158 }
4159}
4160
4161
4162/**
4163 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4164 *
4165 * @param pPool The pool.
4166 * @param pPage The page.
4167 * @param pShwPT The shadow page table (mapping of the page).
4168 */
4169DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
4170{
4171 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4172 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4173 if (pShwPT->a[i].n.u1Present)
4174 {
4175 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4176 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
4177 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
4178 if (!pPage->cPresent)
4179 break;
4180 }
4181}
4182
4183
4184/**
4185 * Clear references to shadowed pages in an EPT page table.
4186 *
4187 * @param pPool The pool.
4188 * @param pPage The page.
4189 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4190 */
4191DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4192{
4193 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4194 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4195 if (pShwPT->a[i].n.u1Present)
4196 {
4197 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4198 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4199 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
4200 if (!pPage->cPresent)
4201 break;
4202 }
4203}
4204
4205
4206
4207/**
4208 * Clear references to shadowed pages in a 32 bits page directory.
4209 *
4210 * @param pPool The pool.
4211 * @param pPage The page.
4212 * @param pShwPD The shadow page directory (mapping of the page).
4213 */
4214DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4215{
4216 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4217 {
4218 if ( pShwPD->a[i].n.u1Present
4219 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4220 )
4221 {
4222 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4223 if (pSubPage)
4224 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4225 else
4226 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4227 }
4228 }
4229}
4230
4231/**
4232 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4233 *
4234 * @param pPool The pool.
4235 * @param pPage The page.
4236 * @param pShwPD The shadow page directory (mapping of the page).
4237 */
4238DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4239{
4240 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4241 {
4242 if ( pShwPD->a[i].n.u1Present
4243 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4244 )
4245 {
4246#ifdef PGM_WITH_LARGE_PAGES
4247 if (pShwPD->a[i].b.u1Size)
4248 {
4249 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4250 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4251 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys /* == base of 2 MB page */);
4252 }
4253 else
4254#endif
4255 {
4256 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4257 if (pSubPage)
4258 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4259 else
4260 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4261 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4262 }
4263 }
4264 }
4265}
4266
4267/**
4268 * Clear references to shadowed pages in a PAE page directory pointer table.
4269 *
4270 * @param pPool The pool.
4271 * @param pPage The page.
4272 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4273 */
4274DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4275{
4276 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4277 {
4278 if ( pShwPDPT->a[i].n.u1Present
4279 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4280 )
4281 {
4282 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4283 if (pSubPage)
4284 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4285 else
4286 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4287 }
4288 }
4289}
4290
4291
4292/**
4293 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4294 *
4295 * @param pPool The pool.
4296 * @param pPage The page.
4297 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4298 */
4299DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4300{
4301 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4302 {
4303 Assert(!(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING));
4304 if (pShwPDPT->a[i].n.u1Present)
4305 {
4306 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4307 if (pSubPage)
4308 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4309 else
4310 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4311 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4312 }
4313 }
4314}
4315
4316
4317/**
4318 * Clear references to shadowed pages in a 64-bit level 4 page table.
4319 *
4320 * @param pPool The pool.
4321 * @param pPage The page.
4322 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4323 */
4324DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4325{
4326 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4327 {
4328 if (pShwPML4->a[i].n.u1Present)
4329 {
4330 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4331 if (pSubPage)
4332 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4333 else
4334 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4335 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4336 }
4337 }
4338}
4339
4340
4341/**
4342 * Clear references to shadowed pages in an EPT page directory.
4343 *
4344 * @param pPool The pool.
4345 * @param pPage The page.
4346 * @param pShwPD The shadow page directory (mapping of the page).
4347 */
4348DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4349{
4350 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4351 {
4352 if (pShwPD->a[i].n.u1Present)
4353 {
4354#ifdef PGM_WITH_LARGE_PAGES
4355 if (pShwPD->a[i].b.u1Size)
4356 {
4357 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4358 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4359 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys /* == base of 2 MB page */);
4360 }
4361 else
4362#endif
4363 {
4364 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4365 if (pSubPage)
4366 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4367 else
4368 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4369 }
4370 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4371 }
4372 }
4373}
4374
4375
4376/**
4377 * Clear references to shadowed pages in an EPT page directory pointer table.
4378 *
4379 * @param pPool The pool.
4380 * @param pPage The page.
4381 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4382 */
4383DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4384{
4385 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4386 {
4387 if (pShwPDPT->a[i].n.u1Present)
4388 {
4389 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4390 if (pSubPage)
4391 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4392 else
4393 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4394 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4395 }
4396 }
4397}
4398
4399
4400/**
4401 * Clears all references made by this page.
4402 *
4403 * This includes other shadow pages and GC physical addresses.
4404 *
4405 * @param pPool The pool.
4406 * @param pPage The page.
4407 */
4408static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4409{
4410 /*
4411 * Map the shadow page and take action according to the page kind.
4412 */
4413 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
4414 switch (pPage->enmKind)
4415 {
4416 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4417 {
4418 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4419 void *pvGst;
4420 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4421 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4422 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4423 break;
4424 }
4425
4426 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4427 {
4428 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4429 void *pvGst;
4430 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4431 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
4432 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4433 break;
4434 }
4435
4436 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4437 {
4438 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4439 void *pvGst;
4440 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4441 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
4442 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4443 break;
4444 }
4445
4446 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4447 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4448 {
4449 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4450 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4451 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4452 break;
4453 }
4454
4455 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4456 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4457 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4458 {
4459 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4460 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
4461 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4462 break;
4463 }
4464
4465 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4466 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4467 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4468 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4469 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4470 case PGMPOOLKIND_PAE_PD_PHYS:
4471 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4472 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4473 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4474 break;
4475
4476 case PGMPOOLKIND_32BIT_PD_PHYS:
4477 case PGMPOOLKIND_32BIT_PD:
4478 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4479 break;
4480
4481 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4482 case PGMPOOLKIND_PAE_PDPT:
4483 case PGMPOOLKIND_PAE_PDPT_PHYS:
4484 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4485 break;
4486
4487 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4488 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4489 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4490 break;
4491
4492 case PGMPOOLKIND_64BIT_PML4:
4493 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4494 break;
4495
4496 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4497 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4498 break;
4499
4500 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4501 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4502 break;
4503
4504 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4505 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4506 break;
4507
4508 default:
4509 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4510 }
4511
4512 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4513 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4514 ASMMemZeroPage(pvShw);
4515 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4516 pPage->fZeroed = true;
4517 PGMPOOL_UNLOCK_PTR(pPool->CTX_SUFF(pVM), pvShw);
4518}
4519
4520/**
4521 * Flushes a pool page.
4522 *
4523 * This moves the page to the free list after removing all user references to it.
4524 *
4525 * @returns VBox status code.
4526 * @retval VINF_SUCCESS on success.
4527 * @param pPool The pool.
4528 * @param HCPhys The HC physical address of the shadow page.
4529 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4530 */
4531int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4532{
4533 PVM pVM = pPool->CTX_SUFF(pVM);
4534 bool fFlushRequired = false;
4535
4536 int rc = VINF_SUCCESS;
4537 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4538 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4539 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4540
4541 /*
4542 * Quietly reject any attempts at flushing any of the special root pages.
4543 */
4544 if (pPage->idx < PGMPOOL_IDX_FIRST)
4545 {
4546 AssertFailed(); /* can no longer happen */
4547 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4548 return VINF_SUCCESS;
4549 }
4550
4551 pgmLock(pVM);
4552
4553 /*
4554 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4555 */
4556 if (pgmPoolIsPageLocked(&pVM->pgm.s, pPage))
4557 {
4558 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4559 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4560 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4561 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4562 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4563 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4564 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4565 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4566 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD,
4567 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4568 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4569 pgmUnlock(pVM);
4570 return VINF_SUCCESS;
4571 }
4572
4573#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4574 /* Start a subset so we won't run out of mapping space. */
4575 PVMCPU pVCpu = VMMGetCpu(pVM);
4576 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4577#endif
4578
4579 /*
4580 * Mark the page as being in need of an ASMMemZeroPage().
4581 */
4582 pPage->fZeroed = false;
4583
4584#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4585 if (pPage->fDirty)
4586 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirty, false /* do not remove */);
4587#endif
4588
4589 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4590 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4591 fFlushRequired = true;
4592
4593 /*
4594 * Clear the page.
4595 */
4596 pgmPoolTrackClearPageUsers(pPool, pPage);
4597 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4598 pgmPoolTrackDeref(pPool, pPage);
4599 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4600
4601 /*
4602 * Flush it from the cache.
4603 */
4604 pgmPoolCacheFlushPage(pPool, pPage);
4605
4606#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4607 /* Heavy stuff done. */
4608 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4609#endif
4610
4611 /*
4612 * Deregistering the monitoring.
4613 */
4614 if (pPage->fMonitored)
4615 rc = pgmPoolMonitorFlush(pPool, pPage);
4616
4617 /*
4618 * Free the page.
4619 */
4620 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4621 pPage->iNext = pPool->iFreeHead;
4622 pPool->iFreeHead = pPage->idx;
4623 pPage->enmKind = PGMPOOLKIND_FREE;
4624 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4625 pPage->GCPhys = NIL_RTGCPHYS;
4626 pPage->fReusedFlushPending = false;
4627
4628 pPool->cUsedPages--;
4629
4630 /* Flush the TLBs of all VCPUs if required. */
4631 if ( fFlushRequired
4632 && fFlush)
4633 {
4634 PGM_INVL_ALL_VCPU_TLBS(pVM);
4635 }
4636
4637 pgmUnlock(pVM);
4638 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4639 return rc;
4640}
4641
4642
4643/**
4644 * Frees a usage of a pool page.
4645 *
4646 * The caller is responsible to updating the user table so that it no longer
4647 * references the shadow page.
4648 *
4649 * @param pPool The pool.
4650 * @param HCPhys The HC physical address of the shadow page.
4651 * @param iUser The shadow page pool index of the user table.
4652 * @param iUserTable The index into the user table (shadowed).
4653 */
4654void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4655{
4656 PVM pVM = pPool->CTX_SUFF(pVM);
4657
4658 STAM_PROFILE_START(&pPool->StatFree, a);
4659 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%#x iUserTable=%#x\n",
4660 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4661 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4662 pgmLock(pVM);
4663 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4664 if (!pPage->fCached)
4665 pgmPoolFlushPage(pPool, pPage);
4666 pgmUnlock(pVM);
4667 STAM_PROFILE_STOP(&pPool->StatFree, a);
4668}
4669
4670
4671/**
4672 * Makes one or more free page free.
4673 *
4674 * @returns VBox status code.
4675 * @retval VINF_SUCCESS on success.
4676 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4677 *
4678 * @param pPool The pool.
4679 * @param enmKind Page table kind
4680 * @param iUser The user of the page.
4681 */
4682static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4683{
4684 PVM pVM = pPool->CTX_SUFF(pVM);
4685
4686 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
4687
4688 /*
4689 * If the pool isn't full grown yet, expand it.
4690 */
4691 if ( pPool->cCurPages < pPool->cMaxPages
4692#if defined(IN_RC)
4693 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
4694 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4695 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
4696#endif
4697 )
4698 {
4699 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4700#ifdef IN_RING3
4701 int rc = PGMR3PoolGrow(pVM);
4702#else
4703 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
4704#endif
4705 if (RT_FAILURE(rc))
4706 return rc;
4707 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4708 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4709 return VINF_SUCCESS;
4710 }
4711
4712 /*
4713 * Free one cached page.
4714 */
4715 return pgmPoolCacheFreeOne(pPool, iUser);
4716}
4717
4718/**
4719 * Allocates a page from the pool.
4720 *
4721 * This page may actually be a cached page and not in need of any processing
4722 * on the callers part.
4723 *
4724 * @returns VBox status code.
4725 * @retval VINF_SUCCESS if a NEW page was allocated.
4726 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4727 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4728 * @param pVM The VM handle.
4729 * @param GCPhys The GC physical address of the page we're gonna shadow.
4730 * For 4MB and 2MB PD entries, it's the first address the
4731 * shadow PT is covering.
4732 * @param enmKind The kind of mapping.
4733 * @param enmAccess Access type for the mapping (only relevant for big pages)
4734 * @param iUser The shadow page pool index of the user table.
4735 * @param iUserTable The index into the user table (shadowed).
4736 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4737 * @param fLockPage Lock the page
4738 */
4739int pgmPoolAllocEx(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage, bool fLockPage)
4740{
4741 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4742 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4743 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%#x iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4744 *ppPage = NULL;
4745 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4746 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4747 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4748
4749 pgmLock(pVM);
4750
4751 if (pPool->fCacheEnabled)
4752 {
4753 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, iUser, iUserTable, ppPage);
4754 if (RT_SUCCESS(rc2))
4755 {
4756 if (fLockPage)
4757 pgmPoolLockPage(pPool, *ppPage);
4758 pgmUnlock(pVM);
4759 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4760 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4761 return rc2;
4762 }
4763 }
4764
4765 /*
4766 * Allocate a new one.
4767 */
4768 int rc = VINF_SUCCESS;
4769 uint16_t iNew = pPool->iFreeHead;
4770 if (iNew == NIL_PGMPOOL_IDX)
4771 {
4772 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
4773 if (RT_FAILURE(rc))
4774 {
4775 pgmUnlock(pVM);
4776 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4777 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4778 return rc;
4779 }
4780 iNew = pPool->iFreeHead;
4781 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4782 }
4783
4784 /* unlink the free head */
4785 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4786 pPool->iFreeHead = pPage->iNext;
4787 pPage->iNext = NIL_PGMPOOL_IDX;
4788
4789 /*
4790 * Initialize it.
4791 */
4792 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4793 pPage->enmKind = enmKind;
4794 pPage->enmAccess = enmAccess;
4795 pPage->GCPhys = GCPhys;
4796 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4797 pPage->fMonitored = false;
4798 pPage->fCached = false;
4799#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4800 pPage->fDirty = false;
4801#endif
4802 pPage->fReusedFlushPending = false;
4803 pPage->cModifications = 0;
4804 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4805 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4806 pPage->cPresent = 0;
4807 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
4808 pPage->pvLastAccessHandlerFault = 0;
4809 pPage->cLastAccessHandlerCount = 0;
4810 pPage->pvLastAccessHandlerRip = 0;
4811
4812 /*
4813 * Insert into the tracking and cache. If this fails, free the page.
4814 */
4815 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4816 if (RT_FAILURE(rc3))
4817 {
4818 pPool->cUsedPages--;
4819 pPage->enmKind = PGMPOOLKIND_FREE;
4820 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4821 pPage->GCPhys = NIL_RTGCPHYS;
4822 pPage->iNext = pPool->iFreeHead;
4823 pPool->iFreeHead = pPage->idx;
4824 pgmUnlock(pVM);
4825 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4826 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4827 return rc3;
4828 }
4829
4830 /*
4831 * Commit the allocation, clear the page and return.
4832 */
4833#ifdef VBOX_WITH_STATISTICS
4834 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4835 pPool->cUsedPagesHigh = pPool->cUsedPages;
4836#endif
4837
4838 if (!pPage->fZeroed)
4839 {
4840 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4841 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4842 ASMMemZeroPage(pv);
4843 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4844 }
4845
4846 *ppPage = pPage;
4847 if (fLockPage)
4848 pgmPoolLockPage(pPool, pPage);
4849 pgmUnlock(pVM);
4850 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4851 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4852 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4853 return rc;
4854}
4855
4856
4857/**
4858 * Frees a usage of a pool page.
4859 *
4860 * @param pVM The VM handle.
4861 * @param HCPhys The HC physical address of the shadow page.
4862 * @param iUser The shadow page pool index of the user table.
4863 * @param iUserTable The index into the user table (shadowed).
4864 */
4865void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4866{
4867 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4868 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4869 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4870}
4871
4872/**
4873 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
4874 *
4875 * @returns Pointer to the shadow page structure.
4876 * @param pPool The pool.
4877 * @param HCPhys The HC physical address of the shadow page.
4878 */
4879PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
4880{
4881 PVM pVM = pPool->CTX_SUFF(pVM);
4882
4883 Assert(PGMIsLockOwner(pVM));
4884
4885 /*
4886 * Look up the page.
4887 */
4888 pgmLock(pVM);
4889 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
4890 pgmUnlock(pVM);
4891
4892 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
4893 return pPage;
4894}
4895
4896#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
4897/**
4898 * Flush the specified page if present
4899 *
4900 * @param pVM The VM handle.
4901 * @param GCPhys Guest physical address of the page to flush
4902 */
4903void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
4904{
4905 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4906
4907 VM_ASSERT_EMT(pVM);
4908
4909 /*
4910 * Look up the GCPhys in the hash.
4911 */
4912 GCPhys = GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
4913 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
4914 if (i == NIL_PGMPOOL_IDX)
4915 return;
4916
4917 do
4918 {
4919 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4920 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
4921 {
4922 switch (pPage->enmKind)
4923 {
4924 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4925 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4926 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4927 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4928 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4929 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4930 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4931 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4932 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4933 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4934 case PGMPOOLKIND_64BIT_PML4:
4935 case PGMPOOLKIND_32BIT_PD:
4936 case PGMPOOLKIND_PAE_PDPT:
4937 {
4938 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
4939#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4940 if (pPage->fDirty)
4941 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
4942 else
4943#endif
4944 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
4945 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
4946 pgmPoolMonitorChainFlush(pPool, pPage);
4947 return;
4948 }
4949
4950 /* ignore, no monitoring. */
4951 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4952 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4953 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4954 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4955 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4956 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4957 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4958 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4959 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4960 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4961 case PGMPOOLKIND_ROOT_NESTED:
4962 case PGMPOOLKIND_PAE_PD_PHYS:
4963 case PGMPOOLKIND_PAE_PDPT_PHYS:
4964 case PGMPOOLKIND_32BIT_PD_PHYS:
4965 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4966 break;
4967
4968 default:
4969 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
4970 }
4971 }
4972
4973 /* next */
4974 i = pPage->iNext;
4975 } while (i != NIL_PGMPOOL_IDX);
4976 return;
4977}
4978#endif /* IN_RING3 */
4979
4980#ifdef IN_RING3
4981
4982
4983/**
4984 * Reset CPU on hot plugging.
4985 *
4986 * @param pVM The VM handle.
4987 * @param pVCpu The virtual CPU.
4988 */
4989void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
4990{
4991 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
4992
4993 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
4994 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
4995 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
4996}
4997
4998
4999/**
5000 * Flushes the entire cache.
5001 *
5002 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5003 * this and execute this CR3 flush.
5004 *
5005 * @param pPool The pool.
5006 */
5007void pgmR3PoolReset(PVM pVM)
5008{
5009 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5010
5011 Assert(PGMIsLockOwner(pVM));
5012 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5013 LogFlow(("pgmR3PoolReset:\n"));
5014
5015 /*
5016 * If there are no pages in the pool, there is nothing to do.
5017 */
5018 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5019 {
5020 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5021 return;
5022 }
5023
5024 /*
5025 * Exit the shadow mode since we're going to clear everything,
5026 * including the root page.
5027 */
5028 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5029 {
5030 PVMCPU pVCpu = &pVM->aCpus[i];
5031 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
5032 }
5033
5034 /*
5035 * Nuke the free list and reinsert all pages into it.
5036 */
5037 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5038 {
5039 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5040
5041 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
5042 if (pPage->fMonitored)
5043 pgmPoolMonitorFlush(pPool, pPage);
5044 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5045 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5046 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5047 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5048 pPage->cModifications = 0;
5049 pPage->GCPhys = NIL_RTGCPHYS;
5050 pPage->enmKind = PGMPOOLKIND_FREE;
5051 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5052 Assert(pPage->idx == i);
5053 pPage->iNext = i + 1;
5054 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5055 pPage->fSeenNonGlobal = false;
5056 pPage->fMonitored = false;
5057#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5058 pPage->fDirty = false;
5059#endif
5060 pPage->fCached = false;
5061 pPage->fReusedFlushPending = false;
5062 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5063 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5064 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5065 pPage->cLocked = 0;
5066 }
5067 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5068 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5069 pPool->cUsedPages = 0;
5070
5071 /*
5072 * Zap and reinitialize the user records.
5073 */
5074 pPool->cPresent = 0;
5075 pPool->iUserFreeHead = 0;
5076 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5077 const unsigned cMaxUsers = pPool->cMaxUsers;
5078 for (unsigned i = 0; i < cMaxUsers; i++)
5079 {
5080 paUsers[i].iNext = i + 1;
5081 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5082 paUsers[i].iUserTable = 0xfffffffe;
5083 }
5084 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5085
5086 /*
5087 * Clear all the GCPhys links and rebuild the phys ext free list.
5088 */
5089 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
5090 pRam;
5091 pRam = pRam->CTX_SUFF(pNext))
5092 {
5093 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5094 while (iPage-- > 0)
5095 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
5096 }
5097
5098 pPool->iPhysExtFreeHead = 0;
5099 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5100 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5101 for (unsigned i = 0; i < cMaxPhysExts; i++)
5102 {
5103 paPhysExts[i].iNext = i + 1;
5104 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5105 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5106 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5107 }
5108 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5109
5110 /*
5111 * Just zap the modified list.
5112 */
5113 pPool->cModifiedPages = 0;
5114 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5115
5116 /*
5117 * Clear the GCPhys hash and the age list.
5118 */
5119 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5120 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5121 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5122 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5123
5124#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5125 /* Clear all dirty pages. */
5126 pPool->idxFreeDirtyPage = 0;
5127 pPool->cDirtyPages = 0;
5128 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
5129 pPool->aIdxDirtyPages[i] = NIL_PGMPOOL_IDX;
5130#endif
5131
5132 /*
5133 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5134 */
5135 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
5136 {
5137 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5138 pPage->iNext = NIL_PGMPOOL_IDX;
5139 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5140 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5141 pPage->cModifications = 0;
5142 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
5143 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5144 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5145 if (pPage->fMonitored)
5146 {
5147 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
5148 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
5149 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
5150 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
5151 pPool->pszAccessHandler);
5152 AssertFatalRCSuccess(rc);
5153 pgmPoolHashInsert(pPool, pPage);
5154 }
5155 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
5156 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
5157 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
5158 }
5159
5160 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5161 {
5162 /*
5163 * Re-enter the shadowing mode and assert Sync CR3 FF.
5164 */
5165 PVMCPU pVCpu = &pVM->aCpus[i];
5166 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5167 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5168 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5169 }
5170
5171 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5172}
5173#endif /* IN_RING3 */
5174
5175#ifdef LOG_ENABLED
5176static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5177{
5178 switch(enmKind)
5179 {
5180 case PGMPOOLKIND_INVALID:
5181 return "PGMPOOLKIND_INVALID";
5182 case PGMPOOLKIND_FREE:
5183 return "PGMPOOLKIND_FREE";
5184 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5185 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5186 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5187 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5188 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5189 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5190 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5191 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5192 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5193 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5194 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5195 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5196 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5197 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5198 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5199 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5200 case PGMPOOLKIND_32BIT_PD:
5201 return "PGMPOOLKIND_32BIT_PD";
5202 case PGMPOOLKIND_32BIT_PD_PHYS:
5203 return "PGMPOOLKIND_32BIT_PD_PHYS";
5204 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5205 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5206 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5207 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5208 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5209 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5210 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5211 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5212 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5213 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5214 case PGMPOOLKIND_PAE_PD_PHYS:
5215 return "PGMPOOLKIND_PAE_PD_PHYS";
5216 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5217 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5218 case PGMPOOLKIND_PAE_PDPT:
5219 return "PGMPOOLKIND_PAE_PDPT";
5220 case PGMPOOLKIND_PAE_PDPT_PHYS:
5221 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5222 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5223 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5224 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5225 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5226 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5227 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5228 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5229 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5230 case PGMPOOLKIND_64BIT_PML4:
5231 return "PGMPOOLKIND_64BIT_PML4";
5232 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5233 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5234 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5235 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5236 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5237 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5238 case PGMPOOLKIND_ROOT_NESTED:
5239 return "PGMPOOLKIND_ROOT_NESTED";
5240 }
5241 return "Unknown kind!";
5242}
5243#endif /* LOG_ENABLED*/
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette