VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 101505

Last change on this file since 101505 was 100964, checked in by vboxsync, 16 months ago

VMM/PGM: Some experiments wrt preseving ZERO page status as the (windows) guest zero's all memory prior to use. bugref:6385 bugref:10509

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 225.2 KB
Line 
1/* $Id: PGMAllPool.cpp 100964 2023-08-24 14:45:42Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_PGM_POOL
33#define VBOX_WITHOUT_PAGING_BIT_FIELDS /* 64-bit bitfields are just asking for trouble. See @bugref{9841} and others. */
34#include <VBox/vmm/pgm.h>
35#include <VBox/vmm/mm.h>
36#include <VBox/vmm/em.h>
37#include <VBox/vmm/cpum.h>
38#include "PGMInternal.h"
39#include <VBox/vmm/vmcc.h>
40#include "PGMInline.h"
41#include <VBox/vmm/hm_vmx.h>
42
43#include <VBox/log.h>
44#include <VBox/err.h>
45#include <iprt/asm.h>
46#include <iprt/string.h>
47
48
49/*********************************************************************************************************************************
50* Internal Functions *
51*********************************************************************************************************************************/
52RT_C_DECLS_BEGIN
53#if 0 /* unused */
54DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
55DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
56#endif /* unused */
57static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
58static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
59static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
60static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
61#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
62static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
63#endif
64#if 0 /*defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)*/
65static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
66#endif
67
68int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage);
69PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVMCC pVM, uint16_t *piPhysExt);
70void pgmPoolTrackPhysExtFree(PVMCC pVM, uint16_t iPhysExt);
71void pgmPoolTrackPhysExtFreeList(PVMCC pVM, uint16_t iPhysExt);
72
73RT_C_DECLS_END
74
75
76#if 0 /* unused */
77/**
78 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
79 *
80 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
81 * @param enmKind The page kind.
82 */
83DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
84{
85 switch (enmKind)
86 {
87 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
88 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
89 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
90 return true;
91 default:
92 return false;
93 }
94}
95#endif /* unused */
96
97
98/**
99 * Flushes a chain of pages sharing the same access monitor.
100 *
101 * @param pPool The pool.
102 * @param pPage A page in the chain.
103 */
104void pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
105{
106 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
107
108 /*
109 * Find the list head.
110 */
111 uint16_t idx = pPage->idx;
112 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
113 {
114 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
115 {
116 idx = pPage->iMonitoredPrev;
117 Assert(idx != pPage->idx);
118 pPage = &pPool->aPages[idx];
119 }
120 }
121
122 /*
123 * Iterate the list flushing each shadow page.
124 */
125 for (;;)
126 {
127 idx = pPage->iMonitoredNext;
128 Assert(idx != pPage->idx);
129 if (pPage->idx >= PGMPOOL_IDX_FIRST)
130 {
131 int rc2 = pgmPoolFlushPage(pPool, pPage);
132 AssertRC(rc2);
133 }
134 /* next */
135 if (idx == NIL_PGMPOOL_IDX)
136 break;
137 pPage = &pPool->aPages[idx];
138 }
139}
140
141
142/**
143 * Wrapper for getting the current context pointer to the entry being modified.
144 *
145 * @returns VBox status code suitable for scheduling.
146 * @param pVM The cross context VM structure.
147 * @param pvDst Destination address
148 * @param pvSrc Pointer to the mapping of @a GCPhysSrc or NULL depending
149 * on the context (e.g. \#PF in R0 & RC).
150 * @param GCPhysSrc The source guest physical address.
151 * @param cb Size of data to read
152 */
153DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVMCC pVM, void *pvDst, void const *pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
154{
155#if defined(IN_RING3)
156 NOREF(pVM); NOREF(GCPhysSrc);
157 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
158 return VINF_SUCCESS;
159#else
160 /** @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
161 NOREF(pvSrc);
162 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
163#endif
164}
165
166
167/**
168 * Process shadow entries before they are changed by the guest.
169 *
170 * For PT entries we will clear them. For PD entries, we'll simply check
171 * for mapping conflicts and set the SyncCR3 FF if found.
172 *
173 * @param pVCpu The cross context virtual CPU structure.
174 * @param pPool The pool.
175 * @param pPage The head page.
176 * @param GCPhysFault The guest physical fault address.
177 * @param pvAddress Pointer to the mapping of @a GCPhysFault or NULL
178 * depending on the context (e.g. \#PF in R0 & RC).
179 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
180 */
181static void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault,
182 void const *pvAddress, unsigned cbWrite)
183{
184 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
185 const unsigned off = GCPhysFault & GUEST_PAGE_OFFSET_MASK;
186 PVMCC pVM = pPool->CTX_SUFF(pVM);
187 NOREF(pVCpu);
188
189 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n",
190 (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))(uintptr_t)pvAddress, GCPhysFault, cbWrite));
191
192 if (PGMPOOL_PAGE_IS_NESTED(pPage))
193 Log7Func(("%RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))(uintptr_t)pvAddress, GCPhysFault, cbWrite));
194
195 for (;;)
196 {
197 union
198 {
199 void *pv;
200 PX86PT pPT;
201 PPGMSHWPTPAE pPTPae;
202 PX86PD pPD;
203 PX86PDPAE pPDPae;
204 PX86PDPT pPDPT;
205 PX86PML4 pPML4;
206#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
207 PEPTPDPT pEptPdpt;
208 PEPTPD pEptPd;
209 PEPTPT pEptPt;
210#endif
211 } uShw;
212
213 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s write=%#x\n",
214 pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
215
216 uShw.pv = NULL;
217 switch (pPage->enmKind)
218 {
219 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
220 {
221 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
222 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
223 const unsigned iShw = off / sizeof(X86PTE);
224 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
225 X86PGUINT const uPde = uShw.pPT->a[iShw].u;
226 if (uPde & X86_PTE_P)
227 {
228 X86PTE GstPte;
229 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
230 AssertRC(rc);
231 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uPde & X86_PTE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
232 pgmPoolTracDerefGCPhysHint(pPool, pPage, uPde & X86_PTE_PG_MASK, GstPte.u & X86_PTE_PG_MASK, iShw);
233 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
234 }
235 break;
236 }
237
238 /* page/2 sized */
239 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
240 {
241 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
242 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
243 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
244 {
245 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
246 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
247 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
248 {
249 X86PTE GstPte;
250 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
251 AssertRC(rc);
252
253 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
254 pgmPoolTracDerefGCPhysHint(pPool, pPage,
255 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
256 GstPte.u & X86_PTE_PG_MASK,
257 iShw);
258 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
259 }
260 }
261 break;
262 }
263
264 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
265 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
266 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
267 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
268 {
269 unsigned iGst = off / sizeof(X86PDE);
270 unsigned iShwPdpt = iGst / 256;
271 unsigned iShw = (iGst % 256) * 2;
272 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
273
274 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
275 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
276 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
277 {
278 for (unsigned i = 0; i < 2; i++)
279 {
280 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw + i].u;
281 if (uPde & X86_PDE_P)
282 {
283 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw + i, uPde));
284 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw + i);
285 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw + i].u, 0);
286 }
287
288 /* paranoia / a bit assumptive. */
289 if ( (off & 3)
290 && (off & 3) + cbWrite > 4)
291 {
292 const unsigned iShw2 = iShw + 2 + i;
293 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
294 {
295 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
296 if (uPde2 & X86_PDE_P)
297 {
298 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
299 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
300 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
301 }
302 }
303 }
304 }
305 }
306 break;
307 }
308
309 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
310 {
311 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
312 const unsigned iShw = off / sizeof(X86PTEPAE);
313 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
314 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
315 {
316 X86PTEPAE GstPte;
317 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
318 AssertRC(rc);
319
320 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
321 pgmPoolTracDerefGCPhysHint(pPool, pPage,
322 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
323 GstPte.u & X86_PTE_PAE_PG_MASK,
324 iShw);
325 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
326 }
327
328 /* paranoia / a bit assumptive. */
329 if ( (off & 7)
330 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
331 {
332 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
333 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
334
335 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
336 {
337 X86PTEPAE GstPte;
338 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
339 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
340 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
341 AssertRC(rc);
342 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
343 pgmPoolTracDerefGCPhysHint(pPool, pPage,
344 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
345 GstPte.u & X86_PTE_PAE_PG_MASK,
346 iShw2);
347 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
348 }
349 }
350 break;
351 }
352
353 case PGMPOOLKIND_32BIT_PD:
354 {
355 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
356 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
357
358 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
359 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
360 X86PGUINT const uPde = uShw.pPD->a[iShw].u;
361 if (uPde & X86_PDE_P)
362 {
363 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
364 pgmPoolFree(pVM, uPde & X86_PDE_PG_MASK, pPage->idx, iShw);
365 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
366 }
367
368 /* paranoia / a bit assumptive. */
369 if ( (off & 3)
370 && (off & 3) + cbWrite > sizeof(X86PTE))
371 {
372 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
373 if ( iShw2 != iShw
374 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
375 {
376 X86PGUINT const uPde2 = uShw.pPD->a[iShw2].u;
377 if (uPde2 & X86_PDE_P)
378 {
379 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
380 pgmPoolFree(pVM, uPde2 & X86_PDE_PG_MASK, pPage->idx, iShw2);
381 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
382 }
383 }
384 }
385#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). - not working any longer... */
386 if ( uShw.pPD->a[iShw].n.u1Present
387 && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
388 {
389 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
390 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
391 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
392 }
393#endif
394 break;
395 }
396
397 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
398 {
399 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
400 const unsigned iShw = off / sizeof(X86PDEPAE);
401 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
402
403 /*
404 * Causes trouble when the guest uses a PDE to refer to the whole page table level
405 * structure. (Invalidate here; faults later on when it tries to change the page
406 * table entries -> recheck; probably only applies to the RC case.)
407 */
408 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw].u;
409 if (uPde & X86_PDE_P)
410 {
411 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
412 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
413 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
414 }
415
416 /* paranoia / a bit assumptive. */
417 if ( (off & 7)
418 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
419 {
420 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
421 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
422
423 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
424 if (uPde2 & X86_PDE_P)
425 {
426 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
427 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
428 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
429 }
430 }
431 break;
432 }
433
434 case PGMPOOLKIND_PAE_PDPT:
435 {
436 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
437 /*
438 * Hopefully this doesn't happen very often:
439 * - touching unused parts of the page
440 * - messing with the bits of pd pointers without changing the physical address
441 */
442 /* PDPT roots are not page aligned; 32 byte only! */
443 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
444
445 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
446 const unsigned iShw = offPdpt / sizeof(X86PDPE);
447 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
448 {
449 X86PGPAEUINT const uPdpe = uShw.pPDPT->a[iShw].u;
450 if (uPdpe & X86_PDPE_P)
451 {
452 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
453 pgmPoolFree(pVM, uPdpe & X86_PDPE_PG_MASK, pPage->idx, iShw);
454 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
455 }
456
457 /* paranoia / a bit assumptive. */
458 if ( (offPdpt & 7)
459 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
460 {
461 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
462 if ( iShw2 != iShw
463 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
464 {
465 X86PGPAEUINT const uPdpe2 = uShw.pPDPT->a[iShw2].u;
466 if (uPdpe2 & X86_PDPE_P)
467 {
468 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
469 pgmPoolFree(pVM, uPdpe2 & X86_PDPE_PG_MASK, pPage->idx, iShw2);
470 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
471 }
472 }
473 }
474 }
475 break;
476 }
477
478 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
479 {
480 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
481 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
482 const unsigned iShw = off / sizeof(X86PDEPAE);
483 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw].u;
484 if (uPde & X86_PDE_P)
485 {
486 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
487 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
488 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
489 }
490
491 /* paranoia / a bit assumptive. */
492 if ( (off & 7)
493 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
494 {
495 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
496 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
497 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
498 if (uPde2 & X86_PDE_P)
499 {
500 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
501 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
502 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
503 }
504 }
505 break;
506 }
507
508 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
509 {
510 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
511 /*
512 * Hopefully this doesn't happen very often:
513 * - messing with the bits of pd pointers without changing the physical address
514 */
515 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
516 const unsigned iShw = off / sizeof(X86PDPE);
517 X86PGPAEUINT const uPdpe = uShw.pPDPT->a[iShw].u;
518 if (uPdpe & X86_PDPE_P)
519 {
520 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uPdpe));
521 pgmPoolFree(pVM, uPdpe & X86_PDPE_PG_MASK, pPage->idx, iShw);
522 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
523 }
524 /* paranoia / a bit assumptive. */
525 if ( (off & 7)
526 && (off & 7) + cbWrite > sizeof(X86PDPE))
527 {
528 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
529 X86PGPAEUINT const uPdpe2 = uShw.pPDPT->a[iShw2].u;
530 if (uPdpe2 & X86_PDPE_P)
531 {
532 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPdpe2));
533 pgmPoolFree(pVM, uPdpe2 & X86_PDPE_PG_MASK, pPage->idx, iShw2);
534 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
535 }
536 }
537 break;
538 }
539
540 case PGMPOOLKIND_64BIT_PML4:
541 {
542 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
543 /*
544 * Hopefully this doesn't happen very often:
545 * - messing with the bits of pd pointers without changing the physical address
546 */
547 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
548 const unsigned iShw = off / sizeof(X86PDPE);
549 X86PGPAEUINT const uPml4e = uShw.pPML4->a[iShw].u;
550 if (uPml4e & X86_PML4E_P)
551 {
552 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uPml4e));
553 pgmPoolFree(pVM, uPml4e & X86_PML4E_PG_MASK, pPage->idx, iShw);
554 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
555 }
556 /* paranoia / a bit assumptive. */
557 if ( (off & 7)
558 && (off & 7) + cbWrite > sizeof(X86PDPE))
559 {
560 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
561 X86PGPAEUINT const uPml4e2 = uShw.pPML4->a[iShw2].u;
562 if (uPml4e2 & X86_PML4E_P)
563 {
564 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPml4e2));
565 pgmPoolFree(pVM, uPml4e2 & X86_PML4E_PG_MASK, pPage->idx, iShw2);
566 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
567 }
568 }
569 break;
570 }
571
572#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
573 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
574 {
575 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
576 const unsigned iShw = off / sizeof(EPTPML4E);
577 X86PGPAEUINT const uPml4e = uShw.pPML4->a[iShw].u;
578 if (uPml4e & EPT_PRESENT_MASK)
579 {
580 Log7Func(("PML4 iShw=%#x: %RX64 (%RGp) -> freeing it!\n", iShw, uPml4e, pPage->GCPhys));
581 pgmPoolFree(pVM, uPml4e & X86_PML4E_PG_MASK, pPage->idx, iShw);
582 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
583 }
584
585 /* paranoia / a bit assumptive. */
586 if ( (off & 7)
587 && (off & 7) + cbWrite > sizeof(X86PML4E))
588 {
589 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
590 X86PGPAEUINT const uPml4e2 = uShw.pPML4->a[iShw2].u;
591 if (uPml4e2 & EPT_PRESENT_MASK)
592 {
593 Log7Func(("PML4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPml4e2));
594 pgmPoolFree(pVM, uPml4e2 & X86_PML4E_PG_MASK, pPage->idx, iShw2);
595 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
596 }
597 }
598 break;
599 }
600
601 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
602 {
603 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
604 const unsigned iShw = off / sizeof(EPTPDPTE);
605 X86PGPAEUINT const uPdpte = uShw.pEptPdpt->a[iShw].u;
606 if (uPdpte & EPT_PRESENT_MASK)
607 {
608 Log7Func(("EPT PDPT iShw=%#x: %RX64 (%RGp) -> freeing it!\n", iShw, uPdpte, pPage->GCPhys));
609 pgmPoolFree(pVM, uPdpte & EPT_PDPTE_PG_MASK, pPage->idx, iShw);
610 ASMAtomicWriteU64(&uShw.pEptPdpt->a[iShw].u, 0);
611 }
612
613 /* paranoia / a bit assumptive. */
614 if ( (off & 7)
615 && (off & 7) + cbWrite > sizeof(EPTPDPTE))
616 {
617 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(EPTPDPTE);
618 X86PGPAEUINT const uPdpte2 = uShw.pEptPdpt->a[iShw2].u;
619 if (uPdpte2 & EPT_PRESENT_MASK)
620 {
621 Log7Func(("EPT PDPT iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPdpte2));
622 pgmPoolFree(pVM, uPdpte2 & EPT_PDPTE_PG_MASK, pPage->idx, iShw2);
623 ASMAtomicWriteU64(&uShw.pEptPdpt->a[iShw2].u, 0);
624 }
625 }
626 break;
627 }
628
629 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
630 {
631 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
632 const unsigned iShw = off / sizeof(EPTPDE);
633 X86PGPAEUINT const uPde = uShw.pEptPd->a[iShw].u;
634 if (uPde & EPT_PRESENT_MASK)
635 {
636 Assert(!(uPde & EPT_E_LEAF));
637 Log7Func(("EPT PD iShw=%#x: %RX64 (%RGp) -> freeing it!\n", iShw, uPde, pPage->GCPhys));
638 pgmPoolFree(pVM, uPde & EPT_PDE_PG_MASK, pPage->idx, iShw);
639 ASMAtomicWriteU64(&uShw.pEptPd->a[iShw].u, 0);
640 }
641
642 /* paranoia / a bit assumptive. */
643 if ( (off & 7)
644 && (off & 7) + cbWrite > sizeof(EPTPDE))
645 {
646 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(EPTPDE);
647 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pEptPd->a));
648 X86PGPAEUINT const uPde2 = uShw.pEptPd->a[iShw2].u;
649 if (uPde2 & EPT_PRESENT_MASK)
650 {
651 Assert(!(uPde2 & EPT_E_LEAF));
652 Log7Func(("EPT PD (2): iShw2=%#x: %RX64 (%RGp) -> freeing it!\n", iShw2, uPde2, pPage->GCPhys));
653 pgmPoolFree(pVM, uPde2 & EPT_PDE_PG_MASK, pPage->idx, iShw2);
654 ASMAtomicWriteU64(&uShw.pEptPd->a[iShw2].u, 0);
655 }
656 }
657 break;
658 }
659
660 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
661 {
662 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
663 const unsigned iShw = off / sizeof(EPTPTE);
664 X86PGPAEUINT const uPte = uShw.pEptPt->a[iShw].u;
665 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
666 if (uPte & EPT_PRESENT_MASK)
667 {
668 EPTPTE GstPte;
669 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
670 AssertRC(rc);
671
672 Log7Func(("EPT PT: iShw=%#x %RX64 (%RGp)\n", iShw, uPte, pPage->GCPhys));
673 pgmPoolTracDerefGCPhysHint(pPool, pPage,
674 uShw.pEptPt->a[iShw].u & EPT_PTE_PG_MASK,
675 GstPte.u & EPT_PTE_PG_MASK,
676 iShw);
677 ASMAtomicWriteU64(&uShw.pEptPt->a[iShw].u, 0);
678 }
679
680 /* paranoia / a bit assumptive. */
681 if ( (off & 7)
682 && (off & 7) + cbWrite > sizeof(EPTPTE))
683 {
684 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(EPTPTE);
685 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pEptPt->a));
686 X86PGPAEUINT const uPte2 = uShw.pEptPt->a[iShw2].u;
687 if (uPte2 & EPT_PRESENT_MASK)
688 {
689 EPTPTE GstPte;
690 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
691 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
692 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
693 AssertRC(rc);
694 Log7Func(("EPT PT (2): iShw=%#x %RX64 (%RGp)\n", iShw2, uPte2, pPage->GCPhys));
695 pgmPoolTracDerefGCPhysHint(pPool, pPage,
696 uShw.pEptPt->a[iShw2].u & EPT_PTE_PG_MASK,
697 GstPte.u & EPT_PTE_PG_MASK,
698 iShw2);
699 ASMAtomicWriteU64(&uShw.pEptPt->a[iShw2].u, 0);
700 }
701 }
702 break;
703 }
704#endif /* VBOX_WITH_NESTED_HWVIRT_VMX_EPT */
705
706 default:
707 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
708 }
709 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
710
711 /* next */
712 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
713 return;
714 pPage = &pPool->aPages[pPage->iMonitoredNext];
715 }
716}
717
718#ifndef IN_RING3
719
720/**
721 * Checks if a access could be a fork operation in progress.
722 *
723 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
724 *
725 * @returns true if it's likely that we're forking, otherwise false.
726 * @param pPool The pool.
727 * @param pDis The disassembled instruction.
728 * @param offFault The access offset.
729 */
730DECLINLINE(bool) pgmRZPoolMonitorIsForking(PPGMPOOL pPool, PDISSTATE pDis, unsigned offFault)
731{
732 /*
733 * i386 linux is using btr to clear X86_PTE_RW.
734 * The functions involved are (2.6.16 source inspection):
735 * clear_bit
736 * ptep_set_wrprotect
737 * copy_one_pte
738 * copy_pte_range
739 * copy_pmd_range
740 * copy_pud_range
741 * copy_page_range
742 * dup_mmap
743 * dup_mm
744 * copy_mm
745 * copy_process
746 * do_fork
747 */
748 if ( pDis->pCurInstr->uOpcode == OP_BTR
749 && !(offFault & 4)
750 /** @todo Validate that the bit index is X86_PTE_RW. */
751 )
752 {
753 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,Fork)); RT_NOREF_PV(pPool);
754 return true;
755 }
756 return false;
757}
758
759
760/**
761 * Determine whether the page is likely to have been reused.
762 *
763 * @returns true if we consider the page as being reused for a different purpose.
764 * @returns false if we consider it to still be a paging page.
765 * @param pVM The cross context VM structure.
766 * @param pVCpu The cross context virtual CPU structure.
767 * @param pCtx Pointer to the register context for the CPU.
768 * @param pDis The disassembly info for the faulting instruction.
769 * @param pvFault The fault address.
770 * @param pPage The pool page being accessed.
771 *
772 * @remark The REP prefix check is left to the caller because of STOSD/W.
773 */
774DECLINLINE(bool) pgmRZPoolMonitorIsReused(PVMCC pVM, PVMCPUCC pVCpu, PCPUMCTX pCtx, PDISSTATE pDis, RTGCPTR pvFault,
775 PPGMPOOLPAGE pPage)
776{
777 /* Locked (CR3, PDPTR*4) should not be reusable. Considering them as
778 such may cause loops booting tst-ubuntu-15_10-64-efi, ++. */
779 if (pPage->cLocked)
780 {
781 Log2(("pgmRZPoolMonitorIsReused: %RGv (%p) can't have been resued, because it's locked!\n", pvFault, pPage));
782 return false;
783 }
784
785 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
786 if ( HMHasPendingIrq(pVM)
787 && pCtx->rsp - pvFault < 32)
788 {
789 /* Fault caused by stack writes while trying to inject an interrupt event. */
790 Log(("pgmRZPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pCtx->rsp));
791 return true;
792 }
793
794 LogFlow(("Reused instr %RGv %d at %RGv param1.fUse=%llx param1.reg=%d\n", pCtx->rip, pDis->pCurInstr->uOpcode, pvFault, pDis->Param1.fUse, pDis->Param1.arch.x86.Base.idxGenReg));
795
796 /* Non-supervisor mode write means it's used for something else. */
797 if (CPUMGetGuestCPL(pVCpu) == 3)
798 return true;
799
800 switch (pDis->pCurInstr->uOpcode)
801 {
802 /* call implies the actual push of the return address faulted */
803 case OP_CALL:
804 Log4(("pgmRZPoolMonitorIsReused: CALL\n"));
805 return true;
806 case OP_PUSH:
807 Log4(("pgmRZPoolMonitorIsReused: PUSH\n"));
808 return true;
809 case OP_PUSHF:
810 Log4(("pgmRZPoolMonitorIsReused: PUSHF\n"));
811 return true;
812 case OP_PUSHA:
813 Log4(("pgmRZPoolMonitorIsReused: PUSHA\n"));
814 return true;
815 case OP_FXSAVE:
816 Log4(("pgmRZPoolMonitorIsReused: FXSAVE\n"));
817 return true;
818 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
819 Log4(("pgmRZPoolMonitorIsReused: MOVNTI\n"));
820 return true;
821 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
822 Log4(("pgmRZPoolMonitorIsReused: MOVNTDQ\n"));
823 return true;
824 case OP_MOVSWD:
825 case OP_STOSWD:
826 if ( pDis->arch.x86.fPrefix == (DISPREFIX_REP|DISPREFIX_REX)
827 && pCtx->rcx >= 0x40
828 )
829 {
830 Assert(pDis->uCpuMode == DISCPUMODE_64BIT);
831
832 Log(("pgmRZPoolMonitorIsReused: OP_STOSQ\n"));
833 return true;
834 }
835 break;
836
837 default:
838 /*
839 * Anything having ESP on the left side means stack writes.
840 */
841 if ( ( (pDis->Param1.fUse & DISUSE_REG_GEN32)
842 || (pDis->Param1.fUse & DISUSE_REG_GEN64))
843 && (pDis->Param1.arch.x86.Base.idxGenReg == DISGREG_ESP))
844 {
845 Log4(("pgmRZPoolMonitorIsReused: ESP\n"));
846 return true;
847 }
848 break;
849 }
850
851 /*
852 * Page table updates are very very unlikely to be crossing page boundraries,
853 * and we don't want to deal with that in pgmPoolMonitorChainChanging and such.
854 */
855 uint32_t const cbWrite = DISGetParamSize(pDis, &pDis->Param1);
856 if ( (((uintptr_t)pvFault + cbWrite) >> X86_PAGE_SHIFT) != ((uintptr_t)pvFault >> X86_PAGE_SHIFT) )
857 {
858 Log4(("pgmRZPoolMonitorIsReused: cross page write\n"));
859 return true;
860 }
861
862 /*
863 * Nobody does an unaligned 8 byte write to a page table, right.
864 */
865 if (cbWrite >= 8 && ((uintptr_t)pvFault & 7) != 0)
866 {
867 Log4(("pgmRZPoolMonitorIsReused: Unaligned 8+ byte write\n"));
868 return true;
869 }
870
871 return false;
872}
873
874
875/**
876 * Flushes the page being accessed.
877 *
878 * @returns VBox status code suitable for scheduling.
879 * @param pVM The cross context VM structure.
880 * @param pVCpu The cross context virtual CPU structure.
881 * @param pPool The pool.
882 * @param pPage The pool page (head).
883 * @param pDis The disassembly of the write instruction.
884 * @param pCtx Pointer to the register context for the CPU.
885 * @param GCPhysFault The fault address as guest physical address.
886 * @todo VBOXSTRICTRC
887 */
888static int pgmRZPoolAccessPfHandlerFlush(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISSTATE pDis,
889 PCPUMCTX pCtx, RTGCPHYS GCPhysFault)
890{
891 NOREF(pVM); NOREF(GCPhysFault);
892
893 /*
894 * First, do the flushing.
895 */
896 pgmPoolMonitorChainFlush(pPool, pPage);
897
898 /*
899 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
900 * Must do this in raw mode (!); XP boot will fail otherwise.
901 */
902 int rc = VINF_SUCCESS;
903 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, pCtx->rip);
904 if (rc2 == VINF_SUCCESS)
905 { /* do nothing */ }
906 else if (rc2 == VINF_EM_RESCHEDULE)
907 {
908 rc = VBOXSTRICTRC_VAL(rc2);
909# ifndef IN_RING3
910 VMCPU_FF_SET(pVCpu, VMCPU_FF_TO_R3);
911# endif
912 }
913 else if (rc2 == VERR_EM_INTERPRETER)
914 {
915 rc = VINF_EM_RAW_EMULATE_INSTR;
916 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
917 }
918 else if (RT_FAILURE_NP(rc2))
919 rc = VBOXSTRICTRC_VAL(rc2);
920 else
921 AssertMsgFailed(("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
922
923 LogFlow(("pgmRZPoolAccessPfHandlerFlush: returns %Rrc (flushed)\n", rc));
924 return rc;
925}
926
927
928/**
929 * Handles the STOSD write accesses.
930 *
931 * @returns VBox status code suitable for scheduling.
932 * @param pVM The cross context VM structure.
933 * @param pPool The pool.
934 * @param pPage The pool page (head).
935 * @param pDis The disassembly of the write instruction.
936 * @param pCtx Pointer to the register context for the CPU.
937 * @param GCPhysFault The fault address as guest physical address.
938 * @param pvFault The fault address.
939 */
940DECLINLINE(int) pgmRZPoolAccessPfHandlerSTOSD(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISSTATE pDis,
941 PCPUMCTX pCtx, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
942{
943 unsigned uIncrement = pDis->Param1.arch.x86.cb;
944 NOREF(pVM);
945
946 Assert(pDis->uCpuMode == DISCPUMODE_32BIT || pDis->uCpuMode == DISCPUMODE_64BIT);
947 Assert(pCtx->rcx <= 0x20);
948
949# ifdef VBOX_STRICT
950 if (pDis->arch.x86.uOpMode == DISCPUMODE_32BIT)
951 Assert(uIncrement == 4);
952 else
953 Assert(uIncrement == 8);
954# endif
955
956 Log3(("pgmRZPoolAccessPfHandlerSTOSD\n"));
957
958 /*
959 * Increment the modification counter and insert it into the list
960 * of modified pages the first time.
961 */
962 if (!pPage->cModifications++)
963 pgmPoolMonitorModifiedInsert(pPool, pPage);
964
965 /*
966 * Execute REP STOSD.
967 *
968 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
969 * write situation, meaning that it's safe to write here.
970 */
971 PVMCPUCC pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
972 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
973 while (pCtx->rcx)
974 {
975 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
976 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pCtx->rax, uIncrement);
977 pu32 += uIncrement;
978 GCPhysFault += uIncrement;
979 pCtx->rdi += uIncrement;
980 pCtx->rcx--;
981 }
982 pCtx->rip += pDis->cbInstr;
983
984 LogFlow(("pgmRZPoolAccessPfHandlerSTOSD: returns\n"));
985 return VINF_SUCCESS;
986}
987
988
989/**
990 * Handles the simple write accesses.
991 *
992 * @returns VBox status code suitable for scheduling.
993 * @param pVM The cross context VM structure.
994 * @param pVCpu The cross context virtual CPU structure.
995 * @param pPool The pool.
996 * @param pPage The pool page (head).
997 * @param pDis The disassembly of the write instruction.
998 * @param pCtx Pointer to the register context for the CPU.
999 * @param GCPhysFault The fault address as guest physical address.
1000 * @param pfReused Reused state (in/out)
1001 */
1002DECLINLINE(int) pgmRZPoolAccessPfHandlerSimple(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISSTATE pDis,
1003 PCPUMCTX pCtx, RTGCPHYS GCPhysFault, bool *pfReused)
1004{
1005 Log3(("pgmRZPoolAccessPfHandlerSimple\n"));
1006 NOREF(pVM);
1007 NOREF(pfReused); /* initialized by caller */
1008
1009 /*
1010 * Increment the modification counter and insert it into the list
1011 * of modified pages the first time.
1012 */
1013 if (!pPage->cModifications++)
1014 pgmPoolMonitorModifiedInsert(pPool, pPage);
1015
1016 /*
1017 * Clear all the pages.
1018 */
1019 uint32_t cbWrite = DISGetParamSize(pDis, &pDis->Param1);
1020 if (cbWrite <= 8)
1021 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, cbWrite);
1022 else if (cbWrite <= 16)
1023 {
1024 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, 8);
1025 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + 8, NULL, cbWrite - 8);
1026 }
1027 else
1028 {
1029 Assert(cbWrite <= 32);
1030 for (uint32_t off = 0; off < cbWrite; off += 8)
1031 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + off, NULL, RT_MIN(8, cbWrite - off));
1032 }
1033
1034 /*
1035 * Interpret the instruction.
1036 */
1037 VBOXSTRICTRC rc = EMInterpretInstructionDisasState(pVCpu, pDis, pCtx->rip);
1038 if (RT_SUCCESS(rc))
1039 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
1040 else if (rc == VERR_EM_INTERPRETER)
1041 {
1042 LogFlow(("pgmRZPoolAccessPfHandlerSimple: Interpretation failed for %04x:%RGv - opcode=%d\n",
1043 pCtx->cs.Sel, (RTGCPTR)pCtx->rip, pDis->pCurInstr->uOpcode));
1044 rc = VINF_EM_RAW_EMULATE_INSTR;
1045 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
1046 }
1047
1048# if 0 /* experimental code */
1049 if (rc == VINF_SUCCESS)
1050 {
1051 switch (pPage->enmKind)
1052 {
1053 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1054 {
1055 X86PTEPAE GstPte;
1056 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1057 AssertRC(rc);
1058
1059 /* Check the new value written by the guest. If present and with a bogus physical address, then
1060 * it's fairly safe to assume the guest is reusing the PT.
1061 */
1062 if (GstPte.n.u1Present)
1063 {
1064 RTHCPHYS HCPhys = -1;
1065 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1066 if (rc != VINF_SUCCESS)
1067 {
1068 *pfReused = true;
1069 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1070 }
1071 }
1072 break;
1073 }
1074 }
1075 }
1076# endif
1077
1078 LogFlow(("pgmRZPoolAccessPfHandlerSimple: returns %Rrc\n", VBOXSTRICTRC_VAL(rc)));
1079 return VBOXSTRICTRC_VAL(rc);
1080}
1081
1082
1083/**
1084 * @callback_method_impl{FNPGMRZPHYSPFHANDLER,
1085 * \#PF access handler callback for page table pages.}
1086 *
1087 * @remarks The @a uUser argument is the index of the PGMPOOLPAGE.
1088 */
1089DECLCALLBACK(VBOXSTRICTRC) pgmRZPoolAccessPfHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCUINT uErrorCode, PCPUMCTX pCtx,
1090 RTGCPTR pvFault, RTGCPHYS GCPhysFault, uint64_t uUser)
1091{
1092 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorRZ, a);
1093 PPGMPOOL const pPool = pVM->pgm.s.CTX_SUFF(pPool);
1094 AssertReturn(uUser < pPool->cCurPages, VERR_PGM_POOL_IPE);
1095 PPGMPOOLPAGE const pPage = &pPool->aPages[uUser];
1096 unsigned cMaxModifications;
1097 bool fForcedFlush = false;
1098 RT_NOREF_PV(uErrorCode);
1099
1100# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
1101 AssertMsg(pVCpu->pgm.s.enmGuestSlatMode == PGMSLAT_DIRECT,
1102 ("pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1103# endif
1104 LogFlow(("pgmRZPoolAccessPfHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1105
1106 PGM_LOCK_VOID(pVM);
1107 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1108 {
1109 /* Pool page changed while we were waiting for the lock; ignore. */
1110 Log(("CPU%d: pgmRZPoolAccessPfHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1111 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
1112 PGM_UNLOCK(pVM);
1113 return VINF_SUCCESS;
1114 }
1115# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1116 if (pPage->fDirty)
1117 {
1118# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
1119 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage));
1120# endif
1121 Assert(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH));
1122 PGM_UNLOCK(pVM);
1123 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1124 }
1125# endif
1126
1127# if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1128 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1129 {
1130 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1131 void *pvGst;
1132 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1133 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1134 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1135 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1136 }
1137# endif
1138
1139# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
1140 if (PGMPOOL_PAGE_IS_NESTED(pPage))
1141 {
1142 Assert(!CPUMIsGuestInVmxNonRootMode(CPUMQueryGuestCtxPtr(pVCpu)));
1143 Log7Func(("Flushing pvFault=%RGv GCPhysFault=%RGp\n", pvFault, GCPhysFault));
1144 pgmPoolMonitorChainFlush(pPool, pPage);
1145 PGM_UNLOCK(pVM);
1146 return VINF_SUCCESS;
1147 }
1148# endif
1149
1150 /*
1151 * Disassemble the faulting instruction.
1152 */
1153 PDISSTATE pDis = &pVCpu->pgm.s.Dis;
1154 int rc = EMInterpretDisasCurrent(pVCpu, pDis, NULL);
1155 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1156 {
1157 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1158 PGM_UNLOCK(pVM);
1159 return rc;
1160 }
1161
1162 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1163
1164 /*
1165 * We should ALWAYS have the list head as user parameter. This
1166 * is because we use that page to record the changes.
1167 */
1168 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1169
1170# ifdef IN_RING0
1171 /* Maximum nr of modifications depends on the page type. */
1172 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1173 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1174 cMaxModifications = 4;
1175 else
1176 cMaxModifications = 24;
1177# else
1178 cMaxModifications = 48;
1179# endif
1180
1181 /*
1182 * Incremental page table updates should weigh more than random ones.
1183 * (Only applies when started from offset 0)
1184 */
1185 pVCpu->pgm.s.cPoolAccessHandler++;
1186 if ( pPage->GCPtrLastAccessHandlerRip >= pCtx->rip - 0x40 /* observed loops in Windows 7 x64 */
1187 && pPage->GCPtrLastAccessHandlerRip < pCtx->rip + 0x40
1188 && pvFault == (pPage->GCPtrLastAccessHandlerFault + pDis->Param1.arch.x86.cb)
1189 && pVCpu->pgm.s.cPoolAccessHandler == pPage->cLastAccessHandler + 1)
1190 {
1191 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1192 Assert(pPage->cModifications < 32000);
1193 pPage->cModifications = pPage->cModifications * 2;
1194 pPage->GCPtrLastAccessHandlerFault = pvFault;
1195 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1196 if (pPage->cModifications >= cMaxModifications)
1197 {
1198 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushReinit);
1199 fForcedFlush = true;
1200 }
1201 }
1202
1203 if (pPage->cModifications >= cMaxModifications)
1204 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1205
1206 /*
1207 * Check if it's worth dealing with.
1208 */
1209 bool fReused = false;
1210 bool fNotReusedNotForking = false;
1211 if ( ( pPage->cModifications < cMaxModifications /** @todo \#define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1212 || pgmPoolIsPageLocked(pPage)
1213 )
1214 && !(fReused = pgmRZPoolMonitorIsReused(pVM, pVCpu, pCtx, pDis, pvFault, pPage))
1215 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1216 {
1217 /*
1218 * Simple instructions, no REP prefix.
1219 */
1220 if (!(pDis->arch.x86.fPrefix & (DISPREFIX_REP | DISPREFIX_REPNE)))
1221 {
1222 rc = pgmRZPoolAccessPfHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pCtx, GCPhysFault, &fReused);
1223 if (fReused)
1224 goto flushPage;
1225
1226 /* A mov instruction to change the first page table entry will be remembered so we can detect
1227 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1228 */
1229 if ( rc == VINF_SUCCESS
1230 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1231 && pDis->pCurInstr->uOpcode == OP_MOV
1232 && (pvFault & PAGE_OFFSET_MASK) == 0)
1233 {
1234 pPage->GCPtrLastAccessHandlerFault = pvFault;
1235 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1236 pPage->GCPtrLastAccessHandlerRip = pCtx->rip;
1237 /* Make sure we don't kick out a page too quickly. */
1238 if (pPage->cModifications > 8)
1239 pPage->cModifications = 2;
1240 }
1241 else if (pPage->GCPtrLastAccessHandlerFault == pvFault)
1242 {
1243 /* ignore the 2nd write to this page table entry. */
1244 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1245 }
1246 else
1247 {
1248 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
1249 pPage->GCPtrLastAccessHandlerRip = 0;
1250 }
1251
1252 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
1253 PGM_UNLOCK(pVM);
1254 return rc;
1255 }
1256
1257 /*
1258 * Windows is frequently doing small memset() operations (netio test 4k+).
1259 * We have to deal with these or we'll kill the cache and performance.
1260 */
1261 if ( pDis->pCurInstr->uOpcode == OP_STOSWD
1262 && !pCtx->eflags.Bits.u1DF
1263 && pDis->arch.x86.uOpMode == pDis->uCpuMode
1264 && pDis->arch.x86.uAddrMode == pDis->uCpuMode)
1265 {
1266 bool fValidStosd = false;
1267
1268 if ( pDis->uCpuMode == DISCPUMODE_32BIT
1269 && pDis->arch.x86.fPrefix == DISPREFIX_REP
1270 && pCtx->ecx <= 0x20
1271 && pCtx->ecx * 4 <= GUEST_PAGE_SIZE - ((uintptr_t)pvFault & GUEST_PAGE_OFFSET_MASK)
1272 && !((uintptr_t)pvFault & 3)
1273 && (pCtx->eax == 0 || pCtx->eax == 0x80) /* the two values observed. */
1274 )
1275 {
1276 fValidStosd = true;
1277 pCtx->rcx &= 0xffffffff; /* paranoia */
1278 }
1279 else
1280 if ( pDis->uCpuMode == DISCPUMODE_64BIT
1281 && pDis->arch.x86.fPrefix == (DISPREFIX_REP | DISPREFIX_REX)
1282 && pCtx->rcx <= 0x20
1283 && pCtx->rcx * 8 <= GUEST_PAGE_SIZE - ((uintptr_t)pvFault & GUEST_PAGE_OFFSET_MASK)
1284 && !((uintptr_t)pvFault & 7)
1285 && (pCtx->rax == 0 || pCtx->rax == 0x80) /* the two values observed. */
1286 )
1287 {
1288 fValidStosd = true;
1289 }
1290
1291 if (fValidStosd)
1292 {
1293 rc = pgmRZPoolAccessPfHandlerSTOSD(pVM, pPool, pPage, pDis, pCtx, GCPhysFault, pvFault);
1294 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZRepStosd, a);
1295 PGM_UNLOCK(pVM);
1296 return rc;
1297 }
1298 }
1299
1300 /* REP prefix, don't bother. */
1301 STAM_COUNTER_INC(&pPool->StatMonitorPfRZRepPrefix);
1302 Log4(("pgmRZPoolAccessPfHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1303 pCtx->eax, pCtx->ecx, pCtx->edi, pCtx->esi, (RTGCPTR)pCtx->rip, pDis->pCurInstr->uOpcode, pDis->arch.x86.fPrefix));
1304 fNotReusedNotForking = true;
1305 }
1306
1307# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1308 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1309 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1310 */
1311 if ( pPage->cModifications >= cMaxModifications
1312 && !fForcedFlush
1313 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1314 && ( fNotReusedNotForking
1315 || ( !pgmRZPoolMonitorIsReused(pVM, pVCpu, pCtx, pDis, pvFault, pPage)
1316 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1317 )
1318 )
1319 {
1320 Assert(!pgmPoolIsPageLocked(pPage));
1321 Assert(pPage->fDirty == false);
1322
1323 /* Flush any monitored duplicates as we will disable write protection. */
1324 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1325 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1326 {
1327 PPGMPOOLPAGE pPageHead = pPage;
1328
1329 /* Find the monitor head. */
1330 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1331 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1332
1333 while (pPageHead)
1334 {
1335 unsigned idxNext = pPageHead->iMonitoredNext;
1336
1337 if (pPageHead != pPage)
1338 {
1339 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1340 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1341 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1342 AssertRC(rc2);
1343 }
1344
1345 if (idxNext == NIL_PGMPOOL_IDX)
1346 break;
1347
1348 pPageHead = &pPool->aPages[idxNext];
1349 }
1350 }
1351
1352 /* The flushing above might fail for locked pages, so double check. */
1353 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1354 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1355 {
1356 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1357
1358 /* Temporarily allow write access to the page table again. */
1359 rc = PGMHandlerPhysicalPageTempOff(pVM,
1360 pPage->GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK,
1361 pPage->GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
1362 if (rc == VINF_SUCCESS)
1363 {
1364 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1365 AssertMsg(rc == VINF_SUCCESS
1366 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1367 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1368 || rc == VERR_PAGE_NOT_PRESENT,
1369 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1370# ifdef VBOX_STRICT
1371 pPage->GCPtrDirtyFault = pvFault;
1372# endif
1373
1374 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, a);
1375 PGM_UNLOCK(pVM);
1376 return rc;
1377 }
1378 }
1379 }
1380# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT && IN_RING0 */
1381
1382 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushModOverflow);
1383flushPage:
1384 /*
1385 * Not worth it, so flush it.
1386 *
1387 * If we considered it to be reused, don't go back to ring-3
1388 * to emulate failed instructions since we usually cannot
1389 * interpret then. This may be a bit risky, in which case
1390 * the reuse detection must be fixed.
1391 */
1392 rc = pgmRZPoolAccessPfHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pCtx, GCPhysFault);
1393 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1394 && fReused)
1395 {
1396 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage)); /* temporary, remove later. */
1397 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1398 if (PGMShwGetPage(pVCpu, pCtx->rip, NULL, NULL) == VINF_SUCCESS)
1399 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1400 }
1401 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZFlushPage, a);
1402 PGM_UNLOCK(pVM);
1403 return rc;
1404}
1405
1406#endif /* !IN_RING3 */
1407
1408/**
1409 * @callback_method_impl{FNPGMPHYSHANDLER,
1410 * Access handler for shadowed page table pages.}
1411 *
1412 * @remarks Only uses the VINF_PGM_HANDLER_DO_DEFAULT status.
1413 * @note The @a uUser argument is the index of the PGMPOOLPAGE.
1414 */
1415DECLCALLBACK(VBOXSTRICTRC)
1416pgmPoolAccessHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf,
1417 PGMACCESSTYPE enmAccessType, PGMACCESSORIGIN enmOrigin, uint64_t uUser)
1418{
1419 PPGMPOOL const pPool = pVM->pgm.s.CTX_SUFF(pPool);
1420 STAM_PROFILE_START(&pPool->CTX_SUFF_Z(StatMonitor), a);
1421 AssertReturn(uUser < pPool->cCurPages, VERR_PGM_POOL_IPE);
1422 PPGMPOOLPAGE const pPage = &pPool->aPages[uUser];
1423 LogFlow(("PGM_ALL_CB_DECL: GCPhys=%RGp %p:{.Core=%RHp, .idx=%d, .GCPhys=%RGp, .enmType=%d}\n",
1424 GCPhys, pPage, pPage->Core.Key, pPage->idx, pPage->GCPhys, pPage->enmKind));
1425
1426 NOREF(pvPhys); NOREF(pvBuf); NOREF(enmAccessType);
1427
1428 PGM_LOCK_VOID(pVM);
1429
1430#ifdef VBOX_WITH_STATISTICS
1431 /*
1432 * Collect stats on the access.
1433 */
1434 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Sizes)) == 19);
1435 if (cbBuf <= 16 && cbBuf > 0)
1436 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[cbBuf - 1]);
1437 else if (cbBuf >= 17 && cbBuf < 32)
1438 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[16]);
1439 else if (cbBuf >= 32 && cbBuf < 64)
1440 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[17]);
1441 else if (cbBuf >= 64)
1442 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[18]);
1443
1444 uint8_t cbAlign;
1445 switch (pPage->enmKind)
1446 {
1447 default:
1448 cbAlign = 7;
1449 break;
1450 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1451 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1452 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1453 case PGMPOOLKIND_32BIT_PD:
1454 case PGMPOOLKIND_32BIT_PD_PHYS:
1455 cbAlign = 3;
1456 break;
1457 }
1458 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Misaligned)) == 7);
1459 if ((uint8_t)GCPhys & cbAlign)
1460 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Misaligned)[((uint8_t)GCPhys & cbAlign) - 1]);
1461#endif
1462
1463 /*
1464 * Make sure the pool page wasn't modified by a different CPU.
1465 */
1466 if (PHYS_PAGE_ADDRESS(GCPhys) == PHYS_PAGE_ADDRESS(pPage->GCPhys))
1467 {
1468 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1469
1470 /* The max modification count before flushing depends on the context and page type. */
1471#ifdef IN_RING3
1472 uint16_t const cMaxModifications = 96; /* it's cheaper here, right? */
1473#else
1474 uint16_t cMaxModifications;
1475 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1476 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1477 cMaxModifications = 4;
1478 else
1479 cMaxModifications = 24;
1480#endif
1481
1482 /*
1483 * We don't have to be very sophisticated about this since there are relativly few calls here.
1484 * However, we must try our best to detect any non-cpu accesses (disk / networking).
1485 */
1486 if ( ( pPage->cModifications < cMaxModifications
1487 || pgmPoolIsPageLocked(pPage) )
1488 && enmOrigin != PGMACCESSORIGIN_DEVICE
1489 && cbBuf <= 16)
1490 {
1491 /* Clear the shadow entry. */
1492 if (!pPage->cModifications++)
1493 pgmPoolMonitorModifiedInsert(pPool, pPage);
1494
1495 if (cbBuf <= 8)
1496 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, (uint32_t)cbBuf);
1497 else
1498 {
1499 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, 8);
1500 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys + 8, (uint8_t *)pvBuf + 8, (uint32_t)cbBuf - 8);
1501 }
1502 }
1503 else
1504 pgmPoolMonitorChainFlush(pPool, pPage);
1505
1506 STAM_PROFILE_STOP_EX(&pPool->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1507 }
1508 else
1509 Log(("CPU%d: PGM_ALL_CB_DECL pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhys), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1510 PGM_UNLOCK(pVM);
1511 return VINF_PGM_HANDLER_DO_DEFAULT;
1512}
1513
1514
1515#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1516
1517# if defined(VBOX_STRICT) && !defined(IN_RING3)
1518
1519/**
1520 * Check references to guest physical memory in a PAE / PAE page table.
1521 *
1522 * @param pPool The pool.
1523 * @param pPage The page.
1524 * @param pShwPT The shadow page table (mapping of the page).
1525 * @param pGstPT The guest page table.
1526 */
1527static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1528{
1529 unsigned cErrors = 0;
1530 int LastRc = -1; /* initialized to shut up gcc */
1531 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1532 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1533 PVMCC pVM = pPool->CTX_SUFF(pVM);
1534
1535# ifdef VBOX_STRICT
1536 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1537 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1538# endif
1539 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1540 {
1541 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1542 {
1543 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1544 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1545 if ( rc != VINF_SUCCESS
1546 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1547 {
1548 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1549 LastPTE = i;
1550 LastRc = rc;
1551 LastHCPhys = HCPhys;
1552 cErrors++;
1553
1554 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1555 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1556 AssertRC(rc);
1557
1558 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1559 {
1560 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1561
1562 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1563 {
1564 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1565
1566 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1567 {
1568 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1569 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1570 {
1571 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1572 }
1573 }
1574
1575 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1576 }
1577 }
1578 }
1579 }
1580 }
1581 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1582}
1583
1584
1585/**
1586 * Check references to guest physical memory in a PAE / 32-bit page table.
1587 *
1588 * @param pPool The pool.
1589 * @param pPage The page.
1590 * @param pShwPT The shadow page table (mapping of the page).
1591 * @param pGstPT The guest page table.
1592 */
1593static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1594{
1595 unsigned cErrors = 0;
1596 int LastRc = -1; /* initialized to shut up gcc */
1597 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1598 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1599 PVMCC pVM = pPool->CTX_SUFF(pVM);
1600
1601# ifdef VBOX_STRICT
1602 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1603 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1604# endif
1605 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1606 {
1607 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1608 {
1609 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1610 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1611 if ( rc != VINF_SUCCESS
1612 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1613 {
1614 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1615 LastPTE = i;
1616 LastRc = rc;
1617 LastHCPhys = HCPhys;
1618 cErrors++;
1619
1620 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1621 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1622 AssertRC(rc);
1623
1624 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1625 {
1626 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1627
1628 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1629 {
1630 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1631
1632 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1633 {
1634 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1635 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1636 {
1637 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1638 }
1639 }
1640
1641 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1642 }
1643 }
1644 }
1645 }
1646 }
1647 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1648}
1649
1650# endif /* VBOX_STRICT && !IN_RING3 */
1651
1652/**
1653 * Clear references to guest physical memory in a PAE / PAE page table.
1654 *
1655 * @returns nr of changed PTEs
1656 * @param pPool The pool.
1657 * @param pPage The page.
1658 * @param pShwPT The shadow page table (mapping of the page).
1659 * @param pGstPT The guest page table.
1660 * @param pOldGstPT The old cached guest page table.
1661 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1662 * @param pfFlush Flush reused page table (out)
1663 */
1664DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1665 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1666{
1667 unsigned cChanged = 0;
1668
1669# ifdef VBOX_STRICT
1670 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1671 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1672# endif
1673 *pfFlush = false;
1674
1675 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1676 {
1677 /* Check the new value written by the guest. If present and with a bogus physical address, then
1678 * it's fairly safe to assume the guest is reusing the PT.
1679 */
1680 if ( fAllowRemoval
1681 && (pGstPT->a[i].u & X86_PTE_P))
1682 {
1683 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1684 {
1685 *pfFlush = true;
1686 return ++cChanged;
1687 }
1688 }
1689 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1690 {
1691 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1692 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1693 {
1694# ifdef VBOX_STRICT
1695 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1696 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1697 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1698# endif
1699 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1700 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1701 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1702 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1703
1704 if ( uHostAttr == uGuestAttr
1705 && fHostRW <= fGuestRW)
1706 continue;
1707 }
1708 cChanged++;
1709 /* Something was changed, so flush it. */
1710 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1711 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1712 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1713 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1714 }
1715 }
1716 return cChanged;
1717}
1718
1719
1720/**
1721 * Clear references to guest physical memory in a PAE / PAE page table.
1722 *
1723 * @returns nr of changed PTEs
1724 * @param pPool The pool.
1725 * @param pPage The page.
1726 * @param pShwPT The shadow page table (mapping of the page).
1727 * @param pGstPT The guest page table.
1728 * @param pOldGstPT The old cached guest page table.
1729 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1730 * @param pfFlush Flush reused page table (out)
1731 */
1732DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1733 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1734{
1735 unsigned cChanged = 0;
1736
1737# ifdef VBOX_STRICT
1738 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1739 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1740# endif
1741 *pfFlush = false;
1742
1743 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1744 {
1745 /* Check the new value written by the guest. If present and with a bogus physical address, then
1746 * it's fairly safe to assume the guest is reusing the PT. */
1747 if (fAllowRemoval)
1748 {
1749 X86PGUINT const uPte = pGstPT->a[i].u;
1750 if ( (uPte & X86_PTE_P)
1751 && !PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), uPte & X86_PTE_PG_MASK))
1752 {
1753 *pfFlush = true;
1754 return ++cChanged;
1755 }
1756 }
1757 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1758 {
1759 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1760 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1761 {
1762# ifdef VBOX_STRICT
1763 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1764 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1765 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1766# endif
1767 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1768 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1769 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1770 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1771
1772 if ( uHostAttr == uGuestAttr
1773 && fHostRW <= fGuestRW)
1774 continue;
1775 }
1776 cChanged++;
1777 /* Something was changed, so flush it. */
1778 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1779 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1780 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1781 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1782 }
1783 }
1784 return cChanged;
1785}
1786
1787
1788/**
1789 * Flush a dirty page
1790 *
1791 * @param pVM The cross context VM structure.
1792 * @param pPool The pool.
1793 * @param idxSlot Dirty array slot index
1794 * @param fAllowRemoval Allow a reused page table to be removed
1795 */
1796static void pgmPoolFlushDirtyPage(PVMCC pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1797{
1798 AssertCompile(RT_ELEMENTS(pPool->aidxDirtyPages) == RT_ELEMENTS(pPool->aDirtyPages));
1799
1800 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1801 unsigned idxPage = pPool->aidxDirtyPages[idxSlot];
1802 if (idxPage == NIL_PGMPOOL_IDX)
1803 return;
1804
1805 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1806 Assert(pPage->idx == idxPage);
1807 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1808
1809 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1810 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1811
1812 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1813 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
1814 Assert(rc == VINF_SUCCESS);
1815 pPage->fDirty = false;
1816
1817# ifdef VBOX_STRICT
1818 uint64_t fFlags = 0;
1819 RTHCPHYS HCPhys;
1820 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->GCPtrDirtyFault, &fFlags, &HCPhys);
1821 AssertMsg( ( rc == VINF_SUCCESS
1822 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1823 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1824 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1825 || rc == VERR_PAGE_NOT_PRESENT,
1826 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->GCPtrDirtyFault, rc, fFlags));
1827# endif
1828
1829 /* Flush those PTEs that have changed. */
1830 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1831 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1832 void *pvGst;
1833 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1834 bool fFlush;
1835 unsigned cChanges;
1836
1837 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1838 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1839 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1840 else
1841 {
1842 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage)); /* temporary, remove later. */
1843 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1844 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1845 }
1846
1847 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1848 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1849 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1850 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1851
1852 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1853 Assert(pPage->cModifications);
1854 if (cChanges < 4)
1855 pPage->cModifications = 1; /* must use > 0 here */
1856 else
1857 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1858
1859 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1860 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1861 pPool->idxFreeDirtyPage = idxSlot;
1862
1863 pPool->cDirtyPages--;
1864 pPool->aidxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1865 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1866 if (fFlush)
1867 {
1868 Assert(fAllowRemoval);
1869 Log(("Flush reused page table!\n"));
1870 pgmPoolFlushPage(pPool, pPage);
1871 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1872 }
1873 else
1874 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1875}
1876
1877
1878# ifndef IN_RING3
1879/**
1880 * Add a new dirty page
1881 *
1882 * @param pVM The cross context VM structure.
1883 * @param pPool The pool.
1884 * @param pPage The page.
1885 */
1886void pgmPoolAddDirtyPage(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1887{
1888 PGM_LOCK_ASSERT_OWNER(pVM);
1889 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1890 Assert(!pPage->fDirty);
1891 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage));
1892
1893 unsigned idxFree = pPool->idxFreeDirtyPage;
1894 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1895 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1896
1897 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1898 {
1899 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1900 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1901 }
1902 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1903 AssertMsg(pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1904
1905 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1906
1907 /*
1908 * Make a copy of the guest page table as we require valid GCPhys addresses
1909 * when removing references to physical pages.
1910 * (The HCPhys linear lookup is *extremely* expensive!)
1911 */
1912 void *pvGst;
1913 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1914 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst,
1915 pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT ? PAGE_SIZE : PAGE_SIZE / 2);
1916# ifdef VBOX_STRICT
1917 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1918 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1919 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1920 else
1921 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1922 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1923# endif
1924 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1925
1926 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1927 pPage->fDirty = true;
1928 pPage->idxDirtyEntry = (uint8_t)idxFree; Assert(pPage->idxDirtyEntry == idxFree);
1929 pPool->aidxDirtyPages[idxFree] = pPage->idx;
1930 pPool->cDirtyPages++;
1931
1932 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1933 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1934 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1935 {
1936 unsigned i;
1937 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1938 {
1939 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1940 if (pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1941 {
1942 pPool->idxFreeDirtyPage = idxFree;
1943 break;
1944 }
1945 }
1946 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1947 }
1948
1949 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1950
1951 /*
1952 * Clear all references to this shadow table. See @bugref{7298}.
1953 */
1954 pgmPoolTrackClearPageUsers(pPool, pPage);
1955}
1956# endif /* !IN_RING3 */
1957
1958
1959/**
1960 * Check if the specified page is dirty (not write monitored)
1961 *
1962 * @return dirty or not
1963 * @param pVM The cross context VM structure.
1964 * @param GCPhys Guest physical address
1965 */
1966bool pgmPoolIsDirtyPageSlow(PVMCC pVM, RTGCPHYS GCPhys)
1967{
1968 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1969 PGM_LOCK_ASSERT_OWNER(pVM);
1970 if (!pPool->cDirtyPages)
1971 return false;
1972
1973 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1974
1975 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1976 {
1977 unsigned idxPage = pPool->aidxDirtyPages[i];
1978 if (idxPage != NIL_PGMPOOL_IDX)
1979 {
1980 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1981 if (pPage->GCPhys == GCPhys)
1982 return true;
1983 }
1984 }
1985 return false;
1986}
1987
1988
1989/**
1990 * Reset all dirty pages by reinstating page monitoring.
1991 *
1992 * @param pVM The cross context VM structure.
1993 */
1994void pgmPoolResetDirtyPages(PVMCC pVM)
1995{
1996 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1997 PGM_LOCK_ASSERT_OWNER(pVM);
1998 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1999
2000 if (!pPool->cDirtyPages)
2001 return;
2002
2003 Log(("pgmPoolResetDirtyPages\n"));
2004 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2005 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
2006
2007 pPool->idxFreeDirtyPage = 0;
2008 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
2009 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
2010 {
2011 unsigned i;
2012 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2013 {
2014 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
2015 {
2016 pPool->idxFreeDirtyPage = i;
2017 break;
2018 }
2019 }
2020 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
2021 }
2022
2023 Assert(pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
2024 return;
2025}
2026
2027
2028/**
2029 * Invalidate the PT entry for the specified page
2030 *
2031 * @param pVM The cross context VM structure.
2032 * @param GCPtrPage Guest page to invalidate
2033 */
2034void pgmPoolResetDirtyPage(PVMCC pVM, RTGCPTR GCPtrPage)
2035{
2036 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2037 PGM_LOCK_ASSERT_OWNER(pVM);
2038 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
2039
2040 if (!pPool->cDirtyPages)
2041 return;
2042
2043 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage)); RT_NOREF_PV(GCPtrPage);
2044 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2045 {
2046 /** @todo What was intended here??? This looks incomplete... */
2047 }
2048}
2049
2050
2051/**
2052 * Reset all dirty pages by reinstating page monitoring.
2053 *
2054 * @param pVM The cross context VM structure.
2055 * @param GCPhysPT Physical address of the page table
2056 */
2057void pgmPoolInvalidateDirtyPage(PVMCC pVM, RTGCPHYS GCPhysPT)
2058{
2059 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2060 PGM_LOCK_ASSERT_OWNER(pVM);
2061 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
2062 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
2063
2064 if (!pPool->cDirtyPages)
2065 return;
2066
2067 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2068
2069 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2070 {
2071 unsigned idxPage = pPool->aidxDirtyPages[i];
2072 if (idxPage != NIL_PGMPOOL_IDX)
2073 {
2074 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
2075 if (pPage->GCPhys == GCPhysPT)
2076 {
2077 idxDirtyPage = i;
2078 break;
2079 }
2080 }
2081 }
2082
2083 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
2084 {
2085 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
2086 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
2087 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
2088 {
2089 unsigned i;
2090 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2091 {
2092 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
2093 {
2094 pPool->idxFreeDirtyPage = i;
2095 break;
2096 }
2097 }
2098 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
2099 }
2100 }
2101}
2102
2103#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
2104
2105/**
2106 * Inserts a page into the GCPhys hash table.
2107 *
2108 * @param pPool The pool.
2109 * @param pPage The page.
2110 */
2111DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2112{
2113 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
2114 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2115 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2116 pPage->iNext = pPool->aiHash[iHash];
2117 pPool->aiHash[iHash] = pPage->idx;
2118}
2119
2120
2121/**
2122 * Removes a page from the GCPhys hash table.
2123 *
2124 * @param pPool The pool.
2125 * @param pPage The page.
2126 */
2127DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2128{
2129 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
2130 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2131 if (pPool->aiHash[iHash] == pPage->idx)
2132 pPool->aiHash[iHash] = pPage->iNext;
2133 else
2134 {
2135 uint16_t iPrev = pPool->aiHash[iHash];
2136 for (;;)
2137 {
2138 const int16_t i = pPool->aPages[iPrev].iNext;
2139 if (i == pPage->idx)
2140 {
2141 pPool->aPages[iPrev].iNext = pPage->iNext;
2142 break;
2143 }
2144 if (i == NIL_PGMPOOL_IDX)
2145 {
2146 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
2147 break;
2148 }
2149 iPrev = i;
2150 }
2151 }
2152 pPage->iNext = NIL_PGMPOOL_IDX;
2153}
2154
2155
2156/**
2157 * Frees up one cache page.
2158 *
2159 * @returns VBox status code.
2160 * @retval VINF_SUCCESS on success.
2161 * @param pPool The pool.
2162 * @param iUser The user index.
2163 */
2164static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
2165{
2166#ifndef VBOX_VMM_TARGET_ARMV8
2167 const PVMCC pVM = pPool->CTX_SUFF(pVM);
2168#endif
2169 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
2170 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
2171
2172 /*
2173 * Select one page from the tail of the age list.
2174 */
2175 PPGMPOOLPAGE pPage;
2176 for (unsigned iLoop = 0; ; iLoop++)
2177 {
2178 uint16_t iToFree = pPool->iAgeTail;
2179 if (iToFree == iUser && iUser != NIL_PGMPOOL_IDX)
2180 iToFree = pPool->aPages[iToFree].iAgePrev;
2181/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2182 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2183 {
2184 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2185 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2186 {
2187 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2188 continue;
2189 iToFree = i;
2190 break;
2191 }
2192 }
2193*/
2194 Assert(iToFree != iUser);
2195 AssertReleaseMsg(iToFree != NIL_PGMPOOL_IDX,
2196 ("iToFree=%#x (iAgeTail=%#x) iUser=%#x iLoop=%u - pPool=%p LB %#zx\n",
2197 iToFree, pPool->iAgeTail, iUser, iLoop, pPool,
2198 RT_UOFFSETOF_DYN(PGMPOOL, aPages[pPool->cMaxPages])
2199 + pPool->cMaxUsers * sizeof(PGMPOOLUSER)
2200 + pPool->cMaxPhysExts * sizeof(PGMPOOLPHYSEXT) ));
2201
2202 pPage = &pPool->aPages[iToFree];
2203
2204 /*
2205 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2206 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2207 */
2208 if ( !pgmPoolIsPageLocked(pPage)
2209 && pPage->idx >= PGMPOOL_IDX_FIRST /* paranoia (#6349) */)
2210 break;
2211 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2212 pgmPoolCacheUsed(pPool, pPage);
2213 AssertLogRelReturn(iLoop < 8192, VERR_PGM_POOL_TOO_MANY_LOOPS);
2214 }
2215
2216 /*
2217 * Found a usable page, flush it and return.
2218 */
2219 int rc = pgmPoolFlushPage(pPool, pPage);
2220 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2221 /** @todo find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2222 if (rc == VINF_SUCCESS)
2223 PGM_INVL_ALL_VCPU_TLBS(pVM);
2224 return rc;
2225}
2226
2227
2228/**
2229 * Checks if a kind mismatch is really a page being reused
2230 * or if it's just normal remappings.
2231 *
2232 * @returns true if reused and the cached page (enmKind1) should be flushed
2233 * @returns false if not reused.
2234 * @param enmKind1 The kind of the cached page.
2235 * @param enmKind2 The kind of the requested page.
2236 */
2237static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2238{
2239 switch (enmKind1)
2240 {
2241 /*
2242 * Never reuse them. There is no remapping in non-paging mode.
2243 */
2244 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2245 case PGMPOOLKIND_32BIT_PD_PHYS:
2246 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2247 case PGMPOOLKIND_PAE_PD_PHYS:
2248 case PGMPOOLKIND_PAE_PDPT_PHYS:
2249 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2250 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2251 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2252 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2253 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2254 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2255 return false;
2256
2257 /*
2258 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2259 */
2260 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2261 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2262 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2263 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2264 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2265 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2266 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2267 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2268 case PGMPOOLKIND_32BIT_PD:
2269 case PGMPOOLKIND_PAE_PDPT:
2270 Assert(!PGMPOOL_PAGE_IS_KIND_NESTED(enmKind2));
2271 switch (enmKind2)
2272 {
2273 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2274 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2275 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2276 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2277 case PGMPOOLKIND_64BIT_PML4:
2278 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2279 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2280 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2281 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2282 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2283 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2284 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2285 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2286 return true;
2287 default:
2288 return false;
2289 }
2290
2291 /*
2292 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2293 */
2294 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2295 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2296 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2297 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2298 case PGMPOOLKIND_64BIT_PML4:
2299 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2300 Assert(!PGMPOOL_PAGE_IS_KIND_NESTED(enmKind2));
2301 switch (enmKind2)
2302 {
2303 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2304 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2305 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2306 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2307 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2308 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2309 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2310 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2311 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2312 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2313 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2314 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2315 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2316 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2317 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2318 return true;
2319 default:
2320 return false;
2321 }
2322
2323#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2324 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
2325 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
2326 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
2327 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
2328 return PGMPOOL_PAGE_IS_KIND_NESTED(enmKind2);
2329
2330 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
2331 return false;
2332#endif
2333
2334 /*
2335 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2336 */
2337 case PGMPOOLKIND_ROOT_NESTED:
2338 return false;
2339
2340 default:
2341 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2342 }
2343}
2344
2345
2346/**
2347 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2348 *
2349 * @returns VBox status code.
2350 * @retval VINF_PGM_CACHED_PAGE on success.
2351 * @retval VERR_FILE_NOT_FOUND if not found.
2352 * @param pPool The pool.
2353 * @param GCPhys The GC physical address of the page we're gonna shadow.
2354 * @param enmKind The kind of mapping.
2355 * @param enmAccess Access type for the mapping (only relevant for big pages)
2356 * @param fA20Enabled Whether the CPU has the A20 gate enabled.
2357 * @param iUser The shadow page pool index of the user table. This is
2358 * NIL_PGMPOOL_IDX for root pages.
2359 * @param iUserTable The index into the user table (shadowed). Ignored if
2360 * root page
2361 * @param ppPage Where to store the pointer to the page.
2362 */
2363static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
2364 uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2365{
2366 /*
2367 * Look up the GCPhys in the hash.
2368 */
2369 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2370 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2371 if (i != NIL_PGMPOOL_IDX)
2372 {
2373 do
2374 {
2375 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2376 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2377 if (pPage->GCPhys == GCPhys)
2378 {
2379 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2380 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess
2381 && pPage->fA20Enabled == fA20Enabled)
2382 {
2383 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2384 * doesn't flush it in case there are no more free use records.
2385 */
2386 pgmPoolCacheUsed(pPool, pPage);
2387
2388 int rc = VINF_SUCCESS;
2389 if (iUser != NIL_PGMPOOL_IDX)
2390 rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2391 if (RT_SUCCESS(rc))
2392 {
2393 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2394 *ppPage = pPage;
2395 if (pPage->cModifications)
2396 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2397 STAM_COUNTER_INC(&pPool->StatCacheHits);
2398 return VINF_PGM_CACHED_PAGE;
2399 }
2400 return rc;
2401 }
2402
2403 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2404 {
2405 /*
2406 * The kind is different. In some cases we should now flush the page
2407 * as it has been reused, but in most cases this is normal remapping
2408 * of PDs as PT or big pages using the GCPhys field in a slightly
2409 * different way than the other kinds.
2410 */
2411 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2412 {
2413 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2414 pgmPoolFlushPage(pPool, pPage);
2415 break;
2416 }
2417 }
2418 }
2419
2420 /* next */
2421 i = pPage->iNext;
2422 } while (i != NIL_PGMPOOL_IDX);
2423 }
2424
2425 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2426 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2427 return VERR_FILE_NOT_FOUND;
2428}
2429
2430
2431/**
2432 * Inserts a page into the cache.
2433 *
2434 * @param pPool The pool.
2435 * @param pPage The cached page.
2436 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2437 */
2438static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2439{
2440 /*
2441 * Insert into the GCPhys hash if the page is fit for that.
2442 */
2443 Assert(!pPage->fCached);
2444 if (fCanBeCached)
2445 {
2446 pPage->fCached = true;
2447 pgmPoolHashInsert(pPool, pPage);
2448 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2449 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2450 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2451 }
2452 else
2453 {
2454 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2455 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2456 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2457 }
2458
2459 /*
2460 * Insert at the head of the age list.
2461 */
2462 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2463 pPage->iAgeNext = pPool->iAgeHead;
2464 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2465 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2466 else
2467 pPool->iAgeTail = pPage->idx;
2468 pPool->iAgeHead = pPage->idx;
2469}
2470
2471
2472/**
2473 * Flushes a cached page.
2474 *
2475 * @param pPool The pool.
2476 * @param pPage The cached page.
2477 */
2478static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2479{
2480 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2481
2482 /*
2483 * Remove the page from the hash.
2484 */
2485 if (pPage->fCached)
2486 {
2487 pPage->fCached = false;
2488 pgmPoolHashRemove(pPool, pPage);
2489 }
2490 else
2491 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2492
2493 /*
2494 * Remove it from the age list.
2495 */
2496 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2497 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2498 else
2499 pPool->iAgeTail = pPage->iAgePrev;
2500 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2501 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2502 else
2503 pPool->iAgeHead = pPage->iAgeNext;
2504 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2505 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2506}
2507
2508
2509/**
2510 * Looks for pages sharing the monitor.
2511 *
2512 * @returns Pointer to the head page.
2513 * @returns NULL if not found.
2514 * @param pPool The Pool
2515 * @param pNewPage The page which is going to be monitored.
2516 */
2517static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2518{
2519 /*
2520 * Look up the GCPhys in the hash.
2521 */
2522 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2523 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2524 if (i == NIL_PGMPOOL_IDX)
2525 return NULL;
2526 do
2527 {
2528 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2529 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2530 && pPage != pNewPage)
2531 {
2532 switch (pPage->enmKind)
2533 {
2534 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2535 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2536 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2537 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2538 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2539 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2540 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2541 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2542 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2543 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2544 case PGMPOOLKIND_64BIT_PML4:
2545 case PGMPOOLKIND_32BIT_PD:
2546 case PGMPOOLKIND_PAE_PDPT:
2547#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2548 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
2549 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
2550 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
2551#endif
2552 {
2553 /* find the head */
2554 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2555 {
2556 Assert(pPage->iMonitoredPrev != pPage->idx);
2557 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2558 }
2559 return pPage;
2560 }
2561
2562 /* ignore, no monitoring. */
2563 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2564 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2565 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2566 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2567 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2568 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2569 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2570 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2571 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2572 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2573 case PGMPOOLKIND_ROOT_NESTED:
2574 case PGMPOOLKIND_PAE_PD_PHYS:
2575 case PGMPOOLKIND_PAE_PDPT_PHYS:
2576 case PGMPOOLKIND_32BIT_PD_PHYS:
2577 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2578#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2579 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
2580 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
2581#endif
2582 break;
2583 default:
2584 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2585 }
2586 }
2587
2588 /* next */
2589 i = pPage->iNext;
2590 } while (i != NIL_PGMPOOL_IDX);
2591 return NULL;
2592}
2593
2594
2595/**
2596 * Enabled write monitoring of a guest page.
2597 *
2598 * @returns VBox status code.
2599 * @retval VINF_SUCCESS on success.
2600 * @param pPool The pool.
2601 * @param pPage The cached page.
2602 */
2603static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2604{
2605 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2606
2607 /*
2608 * Filter out the relevant kinds.
2609 */
2610 switch (pPage->enmKind)
2611 {
2612 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2613 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2614 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2615 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2616 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2617 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2618 case PGMPOOLKIND_64BIT_PML4:
2619 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2620 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2621 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2622 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2623 case PGMPOOLKIND_32BIT_PD:
2624 case PGMPOOLKIND_PAE_PDPT:
2625 break;
2626
2627 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2628 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2629 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2630 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2631 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2632 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2633 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2634 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2635 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2636 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2637 case PGMPOOLKIND_ROOT_NESTED:
2638 /* Nothing to monitor here. */
2639 return VINF_SUCCESS;
2640
2641 case PGMPOOLKIND_32BIT_PD_PHYS:
2642 case PGMPOOLKIND_PAE_PDPT_PHYS:
2643 case PGMPOOLKIND_PAE_PD_PHYS:
2644 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2645 /* Nothing to monitor here. */
2646 return VINF_SUCCESS;
2647
2648#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2649 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
2650 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
2651 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
2652 break;
2653
2654 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
2655 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
2656 /* Nothing to monitor here. */
2657 return VINF_SUCCESS;
2658#endif
2659
2660 default:
2661 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2662 }
2663
2664 /*
2665 * Install handler.
2666 */
2667 int rc;
2668 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2669 if (pPageHead)
2670 {
2671 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2672 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2673
2674#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2675 if (pPageHead->fDirty)
2676 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirtyEntry, false /* do not remove */);
2677#endif
2678
2679 pPage->iMonitoredPrev = pPageHead->idx;
2680 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2681 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2682 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2683 pPageHead->iMonitoredNext = pPage->idx;
2684 rc = VINF_SUCCESS;
2685 if (PGMPOOL_PAGE_IS_NESTED(pPage))
2686 Log7Func(("Adding to monitoring list GCPhysPage=%RGp\n", pPage->GCPhys));
2687 }
2688 else
2689 {
2690 if (PGMPOOL_PAGE_IS_NESTED(pPage))
2691 Log7Func(("Started monitoring GCPhysPage=%RGp HCPhys=%RHp enmKind=%s\n", pPage->GCPhys, pPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
2692
2693 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2694 PVMCC pVM = pPool->CTX_SUFF(pVM);
2695 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2696 rc = PGMHandlerPhysicalRegister(pVM, GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK, pPool->hAccessHandlerType,
2697 pPage - &pPool->aPages[0], NIL_RTR3PTR /*pszDesc*/);
2698 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2699 * the heap size should suffice. */
2700 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2701 PVMCPU pVCpu = VMMGetCpu(pVM);
2702 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2703 }
2704 pPage->fMonitored = true;
2705 return rc;
2706}
2707
2708
2709/**
2710 * Disables write monitoring of a guest page.
2711 *
2712 * @returns VBox status code.
2713 * @retval VINF_SUCCESS on success.
2714 * @param pPool The pool.
2715 * @param pPage The cached page.
2716 */
2717static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2718{
2719 /*
2720 * Filter out the relevant kinds.
2721 */
2722 switch (pPage->enmKind)
2723 {
2724 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2725 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2726 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2727 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2728 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2729 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2730 case PGMPOOLKIND_64BIT_PML4:
2731 case PGMPOOLKIND_32BIT_PD:
2732 case PGMPOOLKIND_PAE_PDPT:
2733 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2734 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2735 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2736 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2737 break;
2738
2739 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2740 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2741 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2742 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2743 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2744 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2745 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2746 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2747 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2748 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2749 case PGMPOOLKIND_ROOT_NESTED:
2750 case PGMPOOLKIND_PAE_PD_PHYS:
2751 case PGMPOOLKIND_PAE_PDPT_PHYS:
2752 case PGMPOOLKIND_32BIT_PD_PHYS:
2753 /* Nothing to monitor here. */
2754 Assert(!pPage->fMonitored);
2755 return VINF_SUCCESS;
2756
2757#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2758 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
2759 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
2760 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
2761 break;
2762
2763 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
2764 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
2765 /* Nothing to monitor here. */
2766 Assert(!pPage->fMonitored);
2767 return VINF_SUCCESS;
2768#endif
2769
2770 default:
2771 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2772 }
2773 Assert(pPage->fMonitored);
2774
2775 /*
2776 * Remove the page from the monitored list or uninstall it if last.
2777 */
2778 const PVMCC pVM = pPool->CTX_SUFF(pVM);
2779 int rc;
2780 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2781 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2782 {
2783 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2784 {
2785 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2786 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2787 rc = PGMHandlerPhysicalChangeUserArg(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK, pPage->iMonitoredNext);
2788
2789 AssertFatalRCSuccess(rc);
2790 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2791 }
2792 else
2793 {
2794 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2795 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2796 {
2797 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2798 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2799 }
2800 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2801 rc = VINF_SUCCESS;
2802 }
2803 }
2804 else
2805 {
2806 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2807 AssertFatalRC(rc);
2808 PVMCPU pVCpu = VMMGetCpu(pVM);
2809 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2810 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2811 }
2812 pPage->fMonitored = false;
2813
2814 /*
2815 * Remove it from the list of modified pages (if in it).
2816 */
2817 pgmPoolMonitorModifiedRemove(pPool, pPage);
2818
2819 if (PGMPOOL_PAGE_IS_NESTED(pPage))
2820 Log7Func(("Stopped monitoring %RGp\n", pPage->GCPhys));
2821
2822 return rc;
2823}
2824
2825
2826/**
2827 * Inserts the page into the list of modified pages.
2828 *
2829 * @param pPool The pool.
2830 * @param pPage The page.
2831 */
2832void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2833{
2834 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2835 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2836 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2837 && pPool->iModifiedHead != pPage->idx,
2838 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2839 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2840 pPool->iModifiedHead, pPool->cModifiedPages));
2841
2842 pPage->iModifiedNext = pPool->iModifiedHead;
2843 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2844 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2845 pPool->iModifiedHead = pPage->idx;
2846 pPool->cModifiedPages++;
2847#ifdef VBOX_WITH_STATISTICS
2848 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2849 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2850#endif
2851}
2852
2853
2854/**
2855 * Removes the page from the list of modified pages and resets the
2856 * modification counter.
2857 *
2858 * @param pPool The pool.
2859 * @param pPage The page which is believed to be in the list of modified pages.
2860 */
2861static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2862{
2863 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2864 if (pPool->iModifiedHead == pPage->idx)
2865 {
2866 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2867 pPool->iModifiedHead = pPage->iModifiedNext;
2868 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2869 {
2870 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2871 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2872 }
2873 pPool->cModifiedPages--;
2874 }
2875 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2876 {
2877 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2878 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2879 {
2880 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2881 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2882 }
2883 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2884 pPool->cModifiedPages--;
2885 }
2886 else
2887 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2888 pPage->cModifications = 0;
2889}
2890
2891
2892/**
2893 * Zaps the list of modified pages, resetting their modification counters in the process.
2894 *
2895 * @param pVM The cross context VM structure.
2896 */
2897static void pgmPoolMonitorModifiedClearAll(PVMCC pVM)
2898{
2899 PGM_LOCK_VOID(pVM);
2900 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2901 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2902
2903 unsigned cPages = 0; NOREF(cPages);
2904
2905#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2906 pgmPoolResetDirtyPages(pVM);
2907#endif
2908
2909 uint16_t idx = pPool->iModifiedHead;
2910 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2911 while (idx != NIL_PGMPOOL_IDX)
2912 {
2913 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2914 idx = pPage->iModifiedNext;
2915 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2916 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2917 pPage->cModifications = 0;
2918 Assert(++cPages);
2919 }
2920 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2921 pPool->cModifiedPages = 0;
2922 PGM_UNLOCK(pVM);
2923}
2924
2925
2926/**
2927 * Handle SyncCR3 pool tasks
2928 *
2929 * @returns VBox status code.
2930 * @retval VINF_SUCCESS if successfully added.
2931 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2932 * @param pVCpu The cross context virtual CPU structure.
2933 * @remark Should only be used when monitoring is available, thus placed in
2934 * the PGMPOOL_WITH_MONITORING \#ifdef.
2935 */
2936int pgmPoolSyncCR3(PVMCPUCC pVCpu)
2937{
2938 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2939 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2940
2941 /*
2942 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2943 * Occasionally we will have to clear all the shadow page tables because we wanted
2944 * to monitor a page which was mapped by too many shadowed page tables. This operation
2945 * sometimes referred to as a 'lightweight flush'.
2946 */
2947# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2948 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2949 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2950# else /* !IN_RING3 */
2951 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2952 {
2953 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2954 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2955
2956 /* Make sure all other VCPUs return to ring 3. */
2957 if (pVM->cCpus > 1)
2958 {
2959 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2960 PGM_INVL_ALL_VCPU_TLBS(pVM);
2961 }
2962 return VINF_PGM_SYNC_CR3;
2963 }
2964# endif /* !IN_RING3 */
2965 else
2966 {
2967 pgmPoolMonitorModifiedClearAll(pVM);
2968
2969 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2970 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2971 {
2972 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2973 return pgmPoolSyncCR3(pVCpu);
2974 }
2975 }
2976 return VINF_SUCCESS;
2977}
2978
2979
2980/**
2981 * Frees up at least one user entry.
2982 *
2983 * @returns VBox status code.
2984 * @retval VINF_SUCCESS if successfully added.
2985 *
2986 * @param pPool The pool.
2987 * @param iUser The user index.
2988 */
2989static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2990{
2991 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2992 /*
2993 * Just free cached pages in a braindead fashion.
2994 */
2995 /** @todo walk the age list backwards and free the first with usage. */
2996 int rc = VINF_SUCCESS;
2997 do
2998 {
2999 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
3000 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
3001 rc = rc2;
3002 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
3003 return rc;
3004}
3005
3006
3007/**
3008 * Inserts a page into the cache.
3009 *
3010 * This will create user node for the page, insert it into the GCPhys
3011 * hash, and insert it into the age list.
3012 *
3013 * @returns VBox status code.
3014 * @retval VINF_SUCCESS if successfully added.
3015 *
3016 * @param pPool The pool.
3017 * @param pPage The cached page.
3018 * @param GCPhys The GC physical address of the page we're gonna shadow.
3019 * @param iUser The user index.
3020 * @param iUserTable The user table index.
3021 */
3022DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
3023{
3024 int rc = VINF_SUCCESS;
3025 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3026
3027 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable)); RT_NOREF_PV(GCPhys);
3028
3029 if (iUser != NIL_PGMPOOL_IDX)
3030 {
3031#ifdef VBOX_STRICT
3032 /*
3033 * Check that the entry doesn't already exists.
3034 */
3035 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
3036 {
3037 uint16_t i = pPage->iUserHead;
3038 do
3039 {
3040 Assert(i < pPool->cMaxUsers);
3041 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
3042 i = paUsers[i].iNext;
3043 } while (i != NIL_PGMPOOL_USER_INDEX);
3044 }
3045#endif
3046
3047 /*
3048 * Find free a user node.
3049 */
3050 uint16_t i = pPool->iUserFreeHead;
3051 if (i == NIL_PGMPOOL_USER_INDEX)
3052 {
3053 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
3054 if (RT_FAILURE(rc))
3055 return rc;
3056 i = pPool->iUserFreeHead;
3057 }
3058
3059 /*
3060 * Unlink the user node from the free list,
3061 * initialize and insert it into the user list.
3062 */
3063 pPool->iUserFreeHead = paUsers[i].iNext;
3064 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
3065 paUsers[i].iUser = iUser;
3066 paUsers[i].iUserTable = iUserTable;
3067 pPage->iUserHead = i;
3068 }
3069 else
3070 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3071
3072
3073 /*
3074 * Insert into cache and enable monitoring of the guest page if enabled.
3075 *
3076 * Until we implement caching of all levels, including the CR3 one, we'll
3077 * have to make sure we don't try monitor & cache any recursive reuse of
3078 * a monitored CR3 page. Because all windows versions are doing this we'll
3079 * have to be able to do combined access monitoring, CR3 + PT and
3080 * PD + PT (guest PAE).
3081 *
3082 * Update:
3083 * We're now cooperating with the CR3 monitor if an uncachable page is found.
3084 */
3085 const bool fCanBeMonitored = true;
3086 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
3087 if (fCanBeMonitored)
3088 {
3089 rc = pgmPoolMonitorInsert(pPool, pPage);
3090 AssertRC(rc);
3091 }
3092 return rc;
3093}
3094
3095
3096/**
3097 * Adds a user reference to a page.
3098 *
3099 * This will move the page to the head of the
3100 *
3101 * @returns VBox status code.
3102 * @retval VINF_SUCCESS if successfully added.
3103 *
3104 * @param pPool The pool.
3105 * @param pPage The cached page.
3106 * @param iUser The user index.
3107 * @param iUserTable The user table.
3108 */
3109static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3110{
3111 Log3(("pgmPoolTrackAddUser: GCPhys=%RGp iUser=%x iUserTable=%x\n", pPage->GCPhys, iUser, iUserTable));
3112 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3113 Assert(iUser != NIL_PGMPOOL_IDX);
3114
3115# ifdef VBOX_STRICT
3116 /*
3117 * Check that the entry doesn't already exists. We only allow multiple
3118 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
3119 */
3120 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
3121 {
3122 uint16_t i = pPage->iUserHead;
3123 do
3124 {
3125 Assert(i < pPool->cMaxUsers);
3126 /** @todo this assertion looks odd... Shouldn't it be && here? */
3127 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
3128 i = paUsers[i].iNext;
3129 } while (i != NIL_PGMPOOL_USER_INDEX);
3130 }
3131# endif
3132
3133 /*
3134 * Allocate a user node.
3135 */
3136 uint16_t i = pPool->iUserFreeHead;
3137 if (i == NIL_PGMPOOL_USER_INDEX)
3138 {
3139 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
3140 if (RT_FAILURE(rc))
3141 return rc;
3142 i = pPool->iUserFreeHead;
3143 }
3144 pPool->iUserFreeHead = paUsers[i].iNext;
3145
3146 /*
3147 * Initialize the user node and insert it.
3148 */
3149 paUsers[i].iNext = pPage->iUserHead;
3150 paUsers[i].iUser = iUser;
3151 paUsers[i].iUserTable = iUserTable;
3152 pPage->iUserHead = i;
3153
3154# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3155 if (pPage->fDirty)
3156 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirtyEntry, false /* do not remove */);
3157# endif
3158
3159 /*
3160 * Tell the cache to update its replacement stats for this page.
3161 */
3162 pgmPoolCacheUsed(pPool, pPage);
3163 return VINF_SUCCESS;
3164}
3165
3166
3167/**
3168 * Frees a user record associated with a page.
3169 *
3170 * This does not clear the entry in the user table, it simply replaces the
3171 * user record to the chain of free records.
3172 *
3173 * @param pPool The pool.
3174 * @param pPage The shadow page.
3175 * @param iUser The shadow page pool index of the user table.
3176 * @param iUserTable The index into the user table (shadowed).
3177 *
3178 * @remarks Don't call this for root pages.
3179 */
3180static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3181{
3182 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
3183 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3184 Assert(iUser != NIL_PGMPOOL_IDX);
3185
3186 /*
3187 * Unlink and free the specified user entry.
3188 */
3189
3190 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
3191 uint16_t i = pPage->iUserHead;
3192 if ( i != NIL_PGMPOOL_USER_INDEX
3193 && paUsers[i].iUser == iUser
3194 && paUsers[i].iUserTable == iUserTable)
3195 {
3196 pPage->iUserHead = paUsers[i].iNext;
3197
3198 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3199 paUsers[i].iNext = pPool->iUserFreeHead;
3200 pPool->iUserFreeHead = i;
3201 return;
3202 }
3203
3204 /* General: Linear search. */
3205 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
3206 while (i != NIL_PGMPOOL_USER_INDEX)
3207 {
3208 if ( paUsers[i].iUser == iUser
3209 && paUsers[i].iUserTable == iUserTable)
3210 {
3211 if (iPrev != NIL_PGMPOOL_USER_INDEX)
3212 paUsers[iPrev].iNext = paUsers[i].iNext;
3213 else
3214 pPage->iUserHead = paUsers[i].iNext;
3215
3216 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3217 paUsers[i].iNext = pPool->iUserFreeHead;
3218 pPool->iUserFreeHead = i;
3219 return;
3220 }
3221 iPrev = i;
3222 i = paUsers[i].iNext;
3223 }
3224
3225 /* Fatal: didn't find it */
3226 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
3227 iUser, iUserTable, pPage->GCPhys));
3228}
3229
3230
3231#if 0 /* unused */
3232/**
3233 * Gets the entry size of a shadow table.
3234 *
3235 * @param enmKind The kind of page.
3236 *
3237 * @returns The size of the entry in bytes. That is, 4 or 8.
3238 * @returns If the kind is not for a table, an assertion is raised and 0 is
3239 * returned.
3240 */
3241DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
3242{
3243 switch (enmKind)
3244 {
3245 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3246 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3247 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3248 case PGMPOOLKIND_32BIT_PD:
3249 case PGMPOOLKIND_32BIT_PD_PHYS:
3250 return 4;
3251
3252 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3253 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3254 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3255 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3256 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3257 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3258 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3259 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3260 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3261 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3262 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3263 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3264 case PGMPOOLKIND_64BIT_PML4:
3265 case PGMPOOLKIND_PAE_PDPT:
3266 case PGMPOOLKIND_ROOT_NESTED:
3267 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3268 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3269 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3270 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3271 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3272 case PGMPOOLKIND_PAE_PD_PHYS:
3273 case PGMPOOLKIND_PAE_PDPT_PHYS:
3274 return 8;
3275
3276 default:
3277 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3278 }
3279}
3280#endif /* unused */
3281
3282#if 0 /* unused */
3283/**
3284 * Gets the entry size of a guest table.
3285 *
3286 * @param enmKind The kind of page.
3287 *
3288 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3289 * @returns If the kind is not for a table, an assertion is raised and 0 is
3290 * returned.
3291 */
3292DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3293{
3294 switch (enmKind)
3295 {
3296 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3297 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3298 case PGMPOOLKIND_32BIT_PD:
3299 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3300 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3301 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3302 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3303 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3304 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3305 return 4;
3306
3307 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3308 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3309 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3310 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3311 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3312 case PGMPOOLKIND_64BIT_PML4:
3313 case PGMPOOLKIND_PAE_PDPT:
3314 return 8;
3315
3316 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3317 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3318 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3319 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3320 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3321 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3322 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3323 case PGMPOOLKIND_ROOT_NESTED:
3324 case PGMPOOLKIND_PAE_PD_PHYS:
3325 case PGMPOOLKIND_PAE_PDPT_PHYS:
3326 case PGMPOOLKIND_32BIT_PD_PHYS:
3327 /** @todo can we return 0? (nobody is calling this...) */
3328 AssertFailed();
3329 return 0;
3330
3331 default:
3332 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3333 }
3334}
3335#endif /* unused */
3336
3337
3338/**
3339 * Checks one shadow page table entry for a mapping of a physical page.
3340 *
3341 * @returns true / false indicating removal of all relevant PTEs
3342 *
3343 * @param pVM The cross context VM structure.
3344 * @param pPhysPage The guest page in question.
3345 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3346 * @param iShw The shadow page table.
3347 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3348 */
3349static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3350{
3351 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3352 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3353 bool fRet = false;
3354
3355 /*
3356 * Assert sanity.
3357 */
3358 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3359 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3360 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3361
3362 /*
3363 * Then, clear the actual mappings to the page in the shadow PT.
3364 */
3365 switch (pPage->enmKind)
3366 {
3367 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3368 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3369 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3370 {
3371 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3372 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3373 uint32_t u32AndMask = 0;
3374 uint32_t u32OrMask = 0;
3375
3376 if (!fFlushPTEs)
3377 {
3378 /* Note! Disregarding the PGMPHYSHANDLER_F_NOT_IN_HM bit here. Should be harmless. */
3379 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3380 {
3381 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3382 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3383 u32OrMask = X86_PTE_RW;
3384 u32AndMask = UINT32_MAX;
3385 fRet = true;
3386 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3387 break;
3388
3389 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3390 u32OrMask = 0;
3391 u32AndMask = ~X86_PTE_RW;
3392 fRet = true;
3393 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3394 break;
3395 default:
3396 /* We will end up here when called with an "ALL" access handler. */
3397 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3398 break;
3399 }
3400 }
3401 else
3402 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3403
3404 /* Update the counter if we're removing references. */
3405 if (!u32AndMask)
3406 {
3407 Assert(pPage->cPresent);
3408 Assert(pPool->cPresent);
3409 pPage->cPresent--;
3410 pPool->cPresent--;
3411 }
3412
3413 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3414 {
3415 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3416 X86PTE Pte;
3417 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3418 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3419 Pte.u &= ~(X86PGUINT)X86_PTE_RW; /* need to disallow writes when dirty bit tracking is still active. */
3420
3421 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3422 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3423 return fRet;
3424 }
3425#ifdef LOG_ENABLED
3426 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3427 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3428 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3429 {
3430 Log(("i=%d cFound=%d\n", i, ++cFound));
3431 }
3432#endif
3433 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3434 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3435 break;
3436 }
3437
3438 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3439 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3440 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3441 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3442 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3443 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3444#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
3445 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
3446# ifdef PGM_WITH_LARGE_PAGES
3447 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
3448# endif
3449#endif
3450 {
3451 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3452 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3453 uint64_t u64OrMask = 0;
3454 uint64_t u64AndMask = 0;
3455
3456 if (!fFlushPTEs)
3457 {
3458 /* Note! Disregarding the PGMPHYSHANDLER_F_NOT_IN_HM bit here. Should be harmless. */
3459 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3460 {
3461 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3462 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3463 u64OrMask = X86_PTE_RW;
3464 u64AndMask = UINT64_MAX;
3465 fRet = true;
3466 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3467 break;
3468
3469 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3470 u64OrMask = 0;
3471 u64AndMask = ~(uint64_t)X86_PTE_RW;
3472 fRet = true;
3473 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3474 break;
3475
3476 default:
3477 /* We will end up here when called with an "ALL" access handler. */
3478 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3479 break;
3480 }
3481 }
3482 else
3483 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3484
3485 /* Update the counter if we're removing references. */
3486 if (!u64AndMask)
3487 {
3488 Assert(pPage->cPresent);
3489 Assert(pPool->cPresent);
3490 pPage->cPresent--;
3491 pPool->cPresent--;
3492 }
3493
3494 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3495 {
3496 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3497 X86PTEPAE Pte;
3498 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3499 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3500 Pte.u &= ~(X86PGPAEUINT)X86_PTE_RW; /* need to disallow writes when dirty bit tracking is still active. */
3501
3502 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3503 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3504 return fRet;
3505 }
3506#ifdef LOG_ENABLED
3507 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3508 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3509 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3510 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3511 Log(("i=%d cFound=%d\n", i, ++cFound));
3512#endif
3513 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3514 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3515 break;
3516 }
3517
3518#ifdef PGM_WITH_LARGE_PAGES
3519 /* Large page case only. */
3520 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3521 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
3522 {
3523 Assert(pVM->pgm.s.fNestedPaging);
3524
3525 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3526 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3527
3528 Assert( pPage->enmKind != PGMPOOLKIND_EPT_PD_FOR_EPT_PD
3529 || (pPD->a[iPte].u & EPT_E_LEAF));
3530
3531 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3532 {
3533 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3534 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3535 pPD->a[iPte].u = 0;
3536 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3537
3538 /* Update the counter as we're removing references. */
3539 Assert(pPage->cPresent);
3540 Assert(pPool->cPresent);
3541 pPage->cPresent--;
3542 pPool->cPresent--;
3543
3544 return fRet;
3545 }
3546# ifdef LOG_ENABLED
3547 LogRel(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3548 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3549 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3550 LogRel(("i=%d cFound=%d\n", i, ++cFound));
3551# endif
3552 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d enmKind=%d\n", pPage->iFirstPresent, pPage->cPresent, pPage->enmKind));
3553 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3554 break;
3555 }
3556
3557 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3558 case PGMPOOLKIND_PAE_PD_PHYS:
3559 {
3560 Assert(pVM->pgm.s.fNestedPaging);
3561
3562 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3563 PX86PDPAE pPD = (PX86PDPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3564
3565 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3566 {
3567 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3568 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3569 pPD->a[iPte].u = 0;
3570 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3571
3572 /* Update the counter as we're removing references. */
3573 Assert(pPage->cPresent);
3574 Assert(pPool->cPresent);
3575 pPage->cPresent--;
3576 pPool->cPresent--;
3577 return fRet;
3578 }
3579# ifdef LOG_ENABLED
3580 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3581 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3582 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3583 Log(("i=%d cFound=%d\n", i, ++cFound));
3584# endif
3585 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3586 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3587 break;
3588 }
3589#endif /* PGM_WITH_LARGE_PAGES */
3590
3591 default:
3592 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3593 }
3594
3595 /* not reached. */
3596#ifndef _MSC_VER
3597 return fRet;
3598#endif
3599}
3600
3601
3602/**
3603 * Scans one shadow page table for mappings of a physical page.
3604 *
3605 * @param pVM The cross context VM structure.
3606 * @param pPhysPage The guest page in question.
3607 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3608 * @param iShw The shadow page table.
3609 */
3610static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3611{
3612 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3613
3614 /* We should only come here with when there's only one reference to this physical page. */
3615 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3616
3617 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3618 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3619 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3620 if (!fKeptPTEs)
3621 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3622 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3623}
3624
3625
3626/**
3627 * Flushes a list of shadow page tables mapping the same physical page.
3628 *
3629 * @param pVM The cross context VM structure.
3630 * @param pPhysPage The guest page in question.
3631 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3632 * @param iPhysExt The physical cross reference extent list to flush.
3633 */
3634static void pgmPoolTrackFlushGCPhysPTs(PVMCC pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3635{
3636 PGM_LOCK_ASSERT_OWNER(pVM);
3637 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3638 bool fKeepList = false;
3639
3640 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3641 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt=%u\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3642
3643 const uint16_t iPhysExtStart = iPhysExt;
3644 PPGMPOOLPHYSEXT pPhysExt;
3645 do
3646 {
3647 Assert(iPhysExt < pPool->cMaxPhysExts);
3648 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3649 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3650 {
3651 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3652 {
3653 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3654 if (!fKeptPTEs)
3655 {
3656 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3657 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3658 }
3659 else
3660 fKeepList = true;
3661 }
3662 }
3663 /* next */
3664 iPhysExt = pPhysExt->iNext;
3665 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3666
3667 if (!fKeepList)
3668 {
3669 /* insert the list into the free list and clear the ram range entry. */
3670 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3671 pPool->iPhysExtFreeHead = iPhysExtStart;
3672 /* Invalidate the tracking data. */
3673 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3674 }
3675
3676 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3677}
3678
3679
3680/**
3681 * Flushes all shadow page table mappings of the given guest page.
3682 *
3683 * This is typically called when the host page backing the guest one has been
3684 * replaced or when the page protection was changed due to a guest access
3685 * caught by the monitoring.
3686 *
3687 * @returns VBox status code.
3688 * @retval VINF_SUCCESS if all references has been successfully cleared.
3689 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3690 * pool cleaning. FF and sync flags are set.
3691 *
3692 * @param pVM The cross context VM structure.
3693 * @param GCPhysPage GC physical address of the page in question
3694 * @param pPhysPage The guest page in question.
3695 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3696 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3697 * flushed, it is NOT touched if this isn't necessary.
3698 * The caller MUST initialized this to @a false.
3699 */
3700int pgmPoolTrackUpdateGCPhys(PVMCC pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3701{
3702 PVMCPUCC pVCpu = VMMGetCpu(pVM);
3703 PGM_LOCK_VOID(pVM);
3704 int rc = VINF_SUCCESS;
3705
3706#ifdef PGM_WITH_LARGE_PAGES
3707 /* Is this page part of a large page? */
3708 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3709 {
3710 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3711 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3712
3713 /* Fetch the large page base. */
3714 PPGMPAGE pLargePage;
3715 if (GCPhysBase != GCPhysPage)
3716 {
3717 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3718 AssertFatal(pLargePage);
3719 }
3720 else
3721 pLargePage = pPhysPage;
3722
3723 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3724
3725 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3726 {
3727 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3728 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3729 pVM->pgm.s.cLargePagesDisabled++;
3730
3731 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3732 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3733
3734 *pfFlushTLBs = true;
3735 PGM_UNLOCK(pVM);
3736 return rc;
3737 }
3738 }
3739#else
3740 NOREF(GCPhysPage);
3741#endif /* PGM_WITH_LARGE_PAGES */
3742
3743 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3744 if (u16)
3745 {
3746 /*
3747 * The zero page is currently screwing up the tracking and we'll
3748 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3749 * is defined, zero pages won't normally be mapped. Some kind of solution
3750 * will be needed for this problem of course, but it will have to wait...
3751 */
3752# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC /* end up guruing after pgmR0PhysAllocateLargePage otherwise. */
3753 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3754 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3755# else
3756 if (PGM_PAGE_IS_BALLOONED(pPhysPage))
3757# endif
3758 rc = VINF_PGM_GCPHYS_ALIASED;
3759 else
3760 {
3761 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3762 {
3763 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3764 pgmPoolTrackFlushGCPhysPT(pVM,
3765 pPhysPage,
3766 fFlushPTEs,
3767 PGMPOOL_TD_GET_IDX(u16));
3768 }
3769 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3770 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3771 else
3772 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3773 *pfFlushTLBs = true;
3774 }
3775 }
3776
3777 if (rc == VINF_PGM_GCPHYS_ALIASED)
3778 {
3779 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3780 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3781 rc = VINF_PGM_SYNC_CR3;
3782 }
3783 PGM_UNLOCK(pVM);
3784 return rc;
3785}
3786
3787
3788/**
3789 * Scans all shadow page tables for mappings of a physical page.
3790 *
3791 * This may be slow, but it's most likely more efficient than cleaning
3792 * out the entire page pool / cache.
3793 *
3794 * @returns VBox status code.
3795 * @retval VINF_SUCCESS if all references has been successfully cleared.
3796 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3797 * a page pool cleaning.
3798 *
3799 * @param pVM The cross context VM structure.
3800 * @param pPhysPage The guest page in question.
3801 */
3802int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage)
3803{
3804 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3805 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3806 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3807 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3808
3809 /*
3810 * There is a limit to what makes sense.
3811 */
3812 if ( pPool->cPresent > 1024
3813 && pVM->cCpus == 1)
3814 {
3815 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3816 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3817 return VINF_PGM_GCPHYS_ALIASED;
3818 }
3819
3820 /*
3821 * Iterate all the pages until we've encountered all that in use.
3822 * This is simple but not quite optimal solution.
3823 */
3824 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage);
3825 unsigned cLeft = pPool->cUsedPages;
3826 unsigned iPage = pPool->cCurPages;
3827 while (--iPage >= PGMPOOL_IDX_FIRST)
3828 {
3829 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3830 if ( pPage->GCPhys != NIL_RTGCPHYS
3831 && pPage->cPresent)
3832 {
3833 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage)); /* see if it hits */
3834 switch (pPage->enmKind)
3835 {
3836 /*
3837 * We only care about shadow page tables.
3838 */
3839 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3840 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3841 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3842 {
3843 const uint32_t u32 = (uint32_t)u64;
3844 unsigned cPresent = pPage->cPresent;
3845 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3846 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3847 {
3848 const X86PGUINT uPte = pPT->a[i].u;
3849 if (uPte & X86_PTE_P)
3850 {
3851 if ((uPte & X86_PTE_PG_MASK) == u32)
3852 {
3853 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3854 ASMAtomicWriteU32(&pPT->a[i].u, 0);
3855
3856 /* Update the counter as we're removing references. */
3857 Assert(pPage->cPresent);
3858 Assert(pPool->cPresent);
3859 pPage->cPresent--;
3860 pPool->cPresent--;
3861 }
3862 if (!--cPresent)
3863 break;
3864 }
3865 }
3866 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3867 break;
3868 }
3869
3870 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3871 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3872 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3873 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3874 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3875 {
3876 unsigned cPresent = pPage->cPresent;
3877 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3878 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3879 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3880 {
3881 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & X86_PTE_PAE_PG_MASK) == u64)
3882 {
3883 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3884 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[i], 0); /// @todo why not atomic?
3885
3886 /* Update the counter as we're removing references. */
3887 Assert(pPage->cPresent);
3888 Assert(pPool->cPresent);
3889 pPage->cPresent--;
3890 pPool->cPresent--;
3891 }
3892 if (!--cPresent)
3893 break;
3894 }
3895 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3896 break;
3897 }
3898
3899 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3900 {
3901 unsigned cPresent = pPage->cPresent;
3902 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3903 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3904 {
3905 X86PGPAEUINT const uPte = pPT->a[i].u;
3906 if (uPte & EPT_E_READ)
3907 {
3908 if ((uPte & EPT_PTE_PG_MASK) == u64)
3909 {
3910 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3911 ASMAtomicWriteU64(&pPT->a[i].u, 0);
3912
3913 /* Update the counter as we're removing references. */
3914 Assert(pPage->cPresent);
3915 Assert(pPool->cPresent);
3916 pPage->cPresent--;
3917 pPool->cPresent--;
3918 }
3919 if (!--cPresent)
3920 break;
3921 }
3922 }
3923 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3924 break;
3925 }
3926 }
3927
3928 if (!--cLeft)
3929 break;
3930 }
3931 }
3932
3933 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3934 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3935
3936 /*
3937 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3938 */
3939 if (pPool->cPresent > 1024)
3940 {
3941 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3942 return VINF_PGM_GCPHYS_ALIASED;
3943 }
3944
3945 return VINF_SUCCESS;
3946}
3947
3948
3949/**
3950 * Clears the user entry in a user table.
3951 *
3952 * This is used to remove all references to a page when flushing it.
3953 */
3954static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3955{
3956 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3957 Assert(pUser->iUser < pPool->cCurPages);
3958 uint32_t iUserTable = pUser->iUserTable;
3959
3960 /*
3961 * Map the user page. Ignore references made by fictitious pages.
3962 */
3963 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3964 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3965 union
3966 {
3967 uint64_t *pau64;
3968 uint32_t *pau32;
3969 } u;
3970 if (pUserPage->idx < PGMPOOL_IDX_FIRST)
3971 {
3972 Assert(!pUserPage->pvPageR3);
3973 return;
3974 }
3975 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3976
3977
3978 /* Safety precaution in case we change the paging for other modes too in the future. */
3979 Assert(!pgmPoolIsPageLocked(pPage)); RT_NOREF_PV(pPage);
3980
3981#ifdef VBOX_STRICT
3982 /*
3983 * Some sanity checks.
3984 */
3985 switch (pUserPage->enmKind)
3986 {
3987 case PGMPOOLKIND_32BIT_PD:
3988 case PGMPOOLKIND_32BIT_PD_PHYS:
3989 Assert(iUserTable < X86_PG_ENTRIES);
3990 break;
3991 case PGMPOOLKIND_PAE_PDPT:
3992 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3993 case PGMPOOLKIND_PAE_PDPT_PHYS:
3994 Assert(iUserTable < 4);
3995 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3996 break;
3997 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3998 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3999 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4000 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4001 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4002 case PGMPOOLKIND_PAE_PD_PHYS:
4003 Assert(iUserTable < X86_PG_PAE_ENTRIES);
4004 break;
4005 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4006 Assert(iUserTable < X86_PG_PAE_ENTRIES);
4007 break;
4008 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4009 Assert(iUserTable < X86_PG_PAE_ENTRIES);
4010 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
4011 break;
4012 case PGMPOOLKIND_64BIT_PML4:
4013 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
4014 /* GCPhys >> PAGE_SHIFT is the index here */
4015 break;
4016 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4017 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4018 Assert(iUserTable < X86_PG_PAE_ENTRIES);
4019 break;
4020
4021 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4022 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4023 Assert(iUserTable < X86_PG_PAE_ENTRIES);
4024 break;
4025
4026 case PGMPOOLKIND_ROOT_NESTED:
4027 Assert(iUserTable < X86_PG_PAE_ENTRIES);
4028 break;
4029
4030# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
4031 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
4032 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
4033 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
4034 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
4035 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
4036 Assert(iUserTable < EPT_PG_ENTRIES);
4037 break;
4038# endif
4039
4040 default:
4041 AssertMsgFailed(("enmKind=%d GCPhys=%RGp\n", pUserPage->enmKind, pPage->GCPhys));
4042 break;
4043 }
4044#endif /* VBOX_STRICT */
4045
4046 /*
4047 * Clear the entry in the user page.
4048 */
4049 switch (pUserPage->enmKind)
4050 {
4051 /* 32-bit entries */
4052 case PGMPOOLKIND_32BIT_PD:
4053 case PGMPOOLKIND_32BIT_PD_PHYS:
4054 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
4055 break;
4056
4057 /* 64-bit entries */
4058 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4059 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4060 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4061 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4062 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4063 case PGMPOOLKIND_PAE_PD_PHYS:
4064 case PGMPOOLKIND_PAE_PDPT_PHYS:
4065 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4066 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4067 case PGMPOOLKIND_64BIT_PML4:
4068 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4069 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4070 case PGMPOOLKIND_PAE_PDPT:
4071 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4072 case PGMPOOLKIND_ROOT_NESTED:
4073 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4074 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4075# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
4076 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
4077 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
4078 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
4079 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
4080 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
4081#endif
4082 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
4083 break;
4084
4085 default:
4086 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
4087 }
4088 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
4089}
4090
4091
4092/**
4093 * Clears all users of a page.
4094 */
4095static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4096{
4097 /*
4098 * Free all the user records.
4099 */
4100 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
4101
4102 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
4103 uint16_t i = pPage->iUserHead;
4104 while (i != NIL_PGMPOOL_USER_INDEX)
4105 {
4106 /* Clear enter in user table. */
4107 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
4108
4109 /* Free it. */
4110 const uint16_t iNext = paUsers[i].iNext;
4111 paUsers[i].iUser = NIL_PGMPOOL_IDX;
4112 paUsers[i].iNext = pPool->iUserFreeHead;
4113 pPool->iUserFreeHead = i;
4114
4115 /* Next. */
4116 i = iNext;
4117 }
4118 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
4119}
4120
4121
4122/**
4123 * Allocates a new physical cross reference extent.
4124 *
4125 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
4126 * @param pVM The cross context VM structure.
4127 * @param piPhysExt Where to store the phys ext index.
4128 */
4129PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVMCC pVM, uint16_t *piPhysExt)
4130{
4131 PGM_LOCK_ASSERT_OWNER(pVM);
4132 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4133 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
4134 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4135 {
4136 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
4137 return NULL;
4138 }
4139 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4140 pPool->iPhysExtFreeHead = pPhysExt->iNext;
4141 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4142 *piPhysExt = iPhysExt;
4143 return pPhysExt;
4144}
4145
4146
4147/**
4148 * Frees a physical cross reference extent.
4149 *
4150 * @param pVM The cross context VM structure.
4151 * @param iPhysExt The extent to free.
4152 */
4153void pgmPoolTrackPhysExtFree(PVMCC pVM, uint16_t iPhysExt)
4154{
4155 PGM_LOCK_ASSERT_OWNER(pVM);
4156 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4157 Assert(iPhysExt < pPool->cMaxPhysExts);
4158 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4159 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4160 {
4161 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4162 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4163 }
4164 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4165 pPool->iPhysExtFreeHead = iPhysExt;
4166}
4167
4168
4169/**
4170 * Frees a physical cross reference extent.
4171 *
4172 * @param pVM The cross context VM structure.
4173 * @param iPhysExt The extent to free.
4174 */
4175void pgmPoolTrackPhysExtFreeList(PVMCC pVM, uint16_t iPhysExt)
4176{
4177 PGM_LOCK_ASSERT_OWNER(pVM);
4178 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4179
4180 const uint16_t iPhysExtStart = iPhysExt;
4181 PPGMPOOLPHYSEXT pPhysExt;
4182 do
4183 {
4184 Assert(iPhysExt < pPool->cMaxPhysExts);
4185 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4186 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4187 {
4188 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4189 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4190 }
4191
4192 /* next */
4193 iPhysExt = pPhysExt->iNext;
4194 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4195
4196 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4197 pPool->iPhysExtFreeHead = iPhysExtStart;
4198}
4199
4200
4201/**
4202 * Insert a reference into a list of physical cross reference extents.
4203 *
4204 * @returns The new tracking data for PGMPAGE.
4205 *
4206 * @param pVM The cross context VM structure.
4207 * @param iPhysExt The physical extent index of the list head.
4208 * @param iShwPT The shadow page table index.
4209 * @param iPte Page table entry
4210 *
4211 */
4212static uint16_t pgmPoolTrackPhysExtInsert(PVMCC pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
4213{
4214 PGM_LOCK_ASSERT_OWNER(pVM);
4215 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4216 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4217
4218 /*
4219 * Special common cases.
4220 */
4221 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
4222 {
4223 paPhysExts[iPhysExt].aidx[1] = iShwPT;
4224 paPhysExts[iPhysExt].apte[1] = iPte;
4225 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
4226 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
4227 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4228 }
4229 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
4230 {
4231 paPhysExts[iPhysExt].aidx[2] = iShwPT;
4232 paPhysExts[iPhysExt].apte[2] = iPte;
4233 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
4234 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
4235 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4236 }
4237 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
4238
4239 /*
4240 * General treatment.
4241 */
4242 const uint16_t iPhysExtStart = iPhysExt;
4243 unsigned cMax = 15;
4244 for (;;)
4245 {
4246 Assert(iPhysExt < pPool->cMaxPhysExts);
4247 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4248 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
4249 {
4250 paPhysExts[iPhysExt].aidx[i] = iShwPT;
4251 paPhysExts[iPhysExt].apte[i] = iPte;
4252 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
4253 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
4254 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
4255 }
4256 if (!--cMax)
4257 {
4258 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackOverflows);
4259 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4260 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
4261 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4262 }
4263
4264 /* advance */
4265 iPhysExt = paPhysExts[iPhysExt].iNext;
4266 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4267 break;
4268 }
4269
4270 /*
4271 * Add another extent to the list.
4272 */
4273 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4274 if (!pNew)
4275 {
4276 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackNoExtentsLeft);
4277 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4278 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4279 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4280 }
4281 pNew->iNext = iPhysExtStart;
4282 pNew->aidx[0] = iShwPT;
4283 pNew->apte[0] = iPte;
4284 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4285 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4286}
4287
4288
4289/**
4290 * Add a reference to guest physical page where extents are in use.
4291 *
4292 * @returns The new tracking data for PGMPAGE.
4293 *
4294 * @param pVM The cross context VM structure.
4295 * @param pPhysPage Pointer to the aPages entry in the ram range.
4296 * @param u16 The ram range flags (top 16-bits).
4297 * @param iShwPT The shadow page table index.
4298 * @param iPte Page table entry
4299 */
4300uint16_t pgmPoolTrackPhysExtAddref(PVMCC pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4301{
4302 PGM_LOCK_VOID(pVM);
4303 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4304 {
4305 /*
4306 * Convert to extent list.
4307 */
4308 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4309 uint16_t iPhysExt;
4310 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4311 if (pPhysExt)
4312 {
4313 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4314 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliased);
4315 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4316 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4317 pPhysExt->aidx[1] = iShwPT;
4318 pPhysExt->apte[1] = iPte;
4319 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4320 }
4321 else
4322 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4323 }
4324 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4325 {
4326 /*
4327 * Insert into the extent list.
4328 */
4329 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4330 }
4331 else
4332 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedLots);
4333 PGM_UNLOCK(pVM);
4334 return u16;
4335}
4336
4337
4338/**
4339 * Clear references to guest physical memory.
4340 *
4341 * @param pPool The pool.
4342 * @param pPage The page.
4343 * @param pPhysPage Pointer to the aPages entry in the ram range.
4344 * @param iPte Shadow PTE index
4345 */
4346void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4347{
4348 PVMCC pVM = pPool->CTX_SUFF(pVM);
4349 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4350 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4351
4352 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4353 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4354 {
4355 PGM_LOCK_VOID(pVM);
4356
4357 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4358 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4359 do
4360 {
4361 Assert(iPhysExt < pPool->cMaxPhysExts);
4362
4363 /*
4364 * Look for the shadow page and check if it's all freed.
4365 */
4366 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4367 {
4368 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4369 && paPhysExts[iPhysExt].apte[i] == iPte)
4370 {
4371 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4372 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4373
4374 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4375 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4376 {
4377 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4378 PGM_UNLOCK(pVM);
4379 return;
4380 }
4381
4382 /* we can free the node. */
4383 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4384 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4385 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4386 {
4387 /* lonely node */
4388 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4389 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4390 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4391 }
4392 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4393 {
4394 /* head */
4395 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4396 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4397 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4398 }
4399 else
4400 {
4401 /* in list */
4402 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4403 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4404 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4405 }
4406 iPhysExt = iPhysExtNext;
4407 PGM_UNLOCK(pVM);
4408 return;
4409 }
4410 }
4411
4412 /* next */
4413 iPhysExtPrev = iPhysExt;
4414 iPhysExt = paPhysExts[iPhysExt].iNext;
4415 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4416
4417 PGM_UNLOCK(pVM);
4418 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4419 }
4420 else /* nothing to do */
4421 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4422}
4423
4424/**
4425 * Clear references to guest physical memory.
4426 *
4427 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4428 * physical address is assumed to be correct, so the linear search can be
4429 * skipped and we can assert at an earlier point.
4430 *
4431 * @param pPool The pool.
4432 * @param pPage The page.
4433 * @param HCPhys The host physical address corresponding to the guest page.
4434 * @param GCPhys The guest physical address corresponding to HCPhys.
4435 * @param iPte Shadow PTE index
4436 */
4437static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4438{
4439 /*
4440 * Lookup the page and check if it checks out before derefing it.
4441 */
4442 PVMCC pVM = pPool->CTX_SUFF(pVM);
4443 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4444 if (pPhysPage)
4445 {
4446 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4447#ifdef LOG_ENABLED
4448 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4449 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4450#endif
4451 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4452 {
4453 Assert(pPage->cPresent);
4454 Assert(pPool->cPresent);
4455 pPage->cPresent--;
4456 pPool->cPresent--;
4457 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4458 return;
4459 }
4460
4461 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp iPte=%u fIsNested=%RTbool\n",
4462 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage), iPte, PGMPOOL_PAGE_IS_NESTED(pPage)));
4463 }
4464 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4465}
4466
4467
4468/**
4469 * Clear references to guest physical memory.
4470 *
4471 * @param pPool The pool.
4472 * @param pPage The page.
4473 * @param HCPhys The host physical address corresponding to the guest page.
4474 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4475 * @param iPte Shadow pte index
4476 */
4477void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4478{
4479 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4480
4481 /*
4482 * Try the hint first.
4483 */
4484 RTHCPHYS HCPhysHinted;
4485 PVMCC pVM = pPool->CTX_SUFF(pVM);
4486 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4487 if (pPhysPage)
4488 {
4489 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4490 Assert(HCPhysHinted);
4491 if (HCPhysHinted == HCPhys)
4492 {
4493 Assert(pPage->cPresent);
4494 Assert(pPool->cPresent);
4495 pPage->cPresent--;
4496 pPool->cPresent--;
4497 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4498 return;
4499 }
4500 }
4501 else
4502 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4503
4504 /*
4505 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4506 */
4507 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4508 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX);
4509 while (pRam)
4510 {
4511 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4512 while (iPage-- > 0)
4513 {
4514 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4515 {
4516 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4517 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4518 Assert(pPage->cPresent);
4519 Assert(pPool->cPresent);
4520 pPage->cPresent--;
4521 pPool->cPresent--;
4522 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4523 return;
4524 }
4525 }
4526 pRam = pRam->CTX_SUFF(pNext);
4527 }
4528
4529 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4530}
4531
4532
4533/**
4534 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4535 *
4536 * @param pPool The pool.
4537 * @param pPage The page.
4538 * @param pShwPT The shadow page table (mapping of the page).
4539 * @param pGstPT The guest page table.
4540 */
4541DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4542{
4543 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4544 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4545 {
4546 const X86PGUINT uPte = pShwPT->a[i].u;
4547 Assert(!(uPte & RT_BIT_32(10)));
4548 if (uPte & X86_PTE_P)
4549 {
4550 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4551 i, uPte & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4552 pgmPoolTracDerefGCPhysHint(pPool, pPage, uPte & X86_PTE_PG_MASK, pGstPT->a[i].u & fPgMask, i);
4553 if (!pPage->cPresent)
4554 break;
4555 }
4556 }
4557}
4558
4559
4560/**
4561 * Clear references to guest physical memory in a PAE / 32-bit page table.
4562 *
4563 * @param pPool The pool.
4564 * @param pPage The page.
4565 * @param pShwPT The shadow page table (mapping of the page).
4566 * @param pGstPT The guest page table (just a half one).
4567 */
4568DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4569{
4570 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4571 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4572 {
4573 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4574 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4575 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4576 {
4577 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4578 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4579 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4580 if (!pPage->cPresent)
4581 break;
4582 }
4583 }
4584}
4585
4586
4587/**
4588 * Clear references to guest physical memory in a PAE / PAE page table.
4589 *
4590 * @param pPool The pool.
4591 * @param pPage The page.
4592 * @param pShwPT The shadow page table (mapping of the page).
4593 * @param pGstPT The guest page table.
4594 */
4595DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4596{
4597 RTGCPHYS const fPgMask = pPage->fA20Enabled ? X86_PTE_PAE_PG_MASK : X86_PTE_PAE_PG_MASK & ~RT_BIT_64(20);
4598 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4599 {
4600 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4601 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4602 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4603 {
4604 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4605 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4606 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4607 if (!pPage->cPresent)
4608 break;
4609 }
4610 }
4611}
4612
4613
4614/**
4615 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4616 *
4617 * @param pPool The pool.
4618 * @param pPage The page.
4619 * @param pShwPT The shadow page table (mapping of the page).
4620 */
4621DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4622{
4623 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4624 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4625 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4626 {
4627 const X86PGUINT uPte = pShwPT->a[i].u;
4628 Assert(!(uPte & RT_BIT_32(10)));
4629 if (uPte & X86_PTE_P)
4630 {
4631 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4632 i, uPte & X86_PTE_PG_MASK, GCPhys));
4633 pgmPoolTracDerefGCPhys(pPool, pPage, uPte & X86_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4634 if (!pPage->cPresent)
4635 break;
4636 }
4637 }
4638}
4639
4640
4641/**
4642 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4643 *
4644 * @param pPool The pool.
4645 * @param pPage The page.
4646 * @param pShwPT The shadow page table (mapping of the page).
4647 */
4648DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4649{
4650 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4651 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4652 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4653 {
4654 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4655 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4656 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4657 {
4658 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4659 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4660 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys & GCPhysA20Mask, i);
4661 if (!pPage->cPresent)
4662 break;
4663 }
4664 }
4665}
4666
4667
4668/**
4669 * Clear references to shadowed pages in an EPT page table.
4670 *
4671 * @param pPool The pool.
4672 * @param pPage The page.
4673 * @param pShwPT The shadow page directory pointer table (mapping of the
4674 * page).
4675 */
4676DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4677{
4678 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4679 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4680 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4681 {
4682 X86PGPAEUINT const uPte = pShwPT->a[i].u;
4683 Assert((uPte & UINT64_C(0xfff0000000000f80)) == 0);
4684 if (uPte & EPT_E_READ)
4685 {
4686 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4687 i, uPte & EPT_PTE_PG_MASK, pPage->GCPhys));
4688 pgmPoolTracDerefGCPhys(pPool, pPage, uPte & EPT_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4689 if (!pPage->cPresent)
4690 break;
4691 }
4692 }
4693}
4694
4695#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
4696
4697/**
4698 * Clears references to shadowed pages in a SLAT EPT page table.
4699 *
4700 * @param pPool The pool.
4701 * @param pPage The page.
4702 * @param pShwPT The shadow page table (mapping of the page).
4703 * @param pGstPT The guest page table.
4704 */
4705DECLINLINE(void) pgmPoolTrackDerefNestedPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT, PCEPTPT pGstPT)
4706{
4707 Assert(PGMPOOL_PAGE_IS_NESTED(pPage));
4708 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4709 {
4710 X86PGPAEUINT const uShwPte = pShwPT->a[i].u;
4711 Assert((uShwPte & UINT64_C(0xfff0000000000f80)) == 0); /* Access, Dirty, UserX (not supported) and ignored bits 7, 11. */
4712 if (uShwPte & EPT_PRESENT_MASK)
4713 {
4714 Log7Func(("Shw=%RX64 GstPte=%RX64\n", uShwPte, pGstPT->a[i].u));
4715 pgmPoolTracDerefGCPhys(pPool, pPage, uShwPte & EPT_PTE_PG_MASK, pGstPT->a[i].u & EPT_PTE_PG_MASK, i);
4716 if (!pPage->cPresent)
4717 break;
4718 }
4719 }
4720}
4721
4722
4723/**
4724 * Clear references to guest physical memory in a SLAT 2MB EPT page table.
4725 *
4726 * @param pPool The pool.
4727 * @param pPage The page.
4728 * @param pShwPT The shadow page table (mapping of the page).
4729 */
4730DECLINLINE(void) pgmPoolTrackDerefNestedPTEPT2MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4731{
4732 Assert(pPage->fA20Enabled);
4733 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4734 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4735 {
4736 X86PGPAEUINT const uShwPte = pShwPT->a[i].u;
4737 Assert((uShwPte & UINT64_C(0xfff0000000000f80)) == 0); /* Access, Dirty, UserX (not supported) and ignored bits 7, 11. */
4738 if (uShwPte & EPT_PRESENT_MASK)
4739 {
4740 Log7Func(("Shw=%RX64 GstPte=%RX64\n", uShwPte, GCPhys));
4741 pgmPoolTracDerefGCPhys(pPool, pPage, uShwPte & EPT_PTE_PG_MASK, GCPhys, i);
4742 if (!pPage->cPresent)
4743 break;
4744 }
4745 }
4746}
4747
4748
4749/**
4750 * Clear references to shadowed pages in a SLAT EPT page directory.
4751 *
4752 * @param pPool The pool.
4753 * @param pPage The page.
4754 * @param pShwPD The shadow page directory (mapping of the page).
4755 * @param pGstPD The guest page directory.
4756 */
4757DECLINLINE(void) pgmPoolTrackDerefNestedPDEpt(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD, PCEPTPD pGstPD)
4758{
4759 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4760 {
4761 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4762#ifdef PGM_WITH_LARGE_PAGES
4763 AssertMsg((uPde & UINT64_C(0xfff0000000000f00)) == 0, ("uPde=%RX64\n", uPde));
4764#else
4765 AssertMsg((uPde & UINT64_C(0xfff0000000000f80)) == 0, ("uPde=%RX64\n", uPde));
4766#endif
4767 if (uPde & EPT_PRESENT_MASK)
4768 {
4769#ifdef PGM_WITH_LARGE_PAGES
4770 if (uPde & EPT_E_LEAF)
4771 {
4772 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n", i, uPde & EPT_PDE2M_PG_MASK, pPage->GCPhys));
4773 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & EPT_PDE2M_PG_MASK, pGstPD->a[i].u & EPT_PDE2M_PG_MASK, i);
4774 }
4775 else
4776#endif
4777 {
4778 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & EPT_PDE_PG_MASK);
4779 if (pSubPage)
4780 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4781 else
4782 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4783 }
4784 }
4785 }
4786}
4787
4788#endif /* VBOX_WITH_NESTED_HWVIRT_VMX_EPT */
4789
4790
4791/**
4792 * Clear references to shadowed pages in a 32 bits page directory.
4793 *
4794 * @param pPool The pool.
4795 * @param pPage The page.
4796 * @param pShwPD The shadow page directory (mapping of the page).
4797 */
4798DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4799{
4800 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4801 {
4802 X86PGUINT const uPde = pShwPD->a[i].u;
4803 if (uPde & X86_PDE_P)
4804 {
4805 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4806 if (pSubPage)
4807 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4808 else
4809 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4810 }
4811 }
4812}
4813
4814
4815/**
4816 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4817 *
4818 * @param pPool The pool.
4819 * @param pPage The page.
4820 * @param pShwPD The shadow page directory (mapping of the page).
4821 */
4822DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4823{
4824 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4825 {
4826 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4827 if (uPde & X86_PDE_P)
4828 {
4829#ifdef PGM_WITH_LARGE_PAGES
4830 if (uPde & X86_PDE_PS)
4831 {
4832 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4833 i, uPde & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4834 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & X86_PDE2M_PAE_PG_MASK,
4835 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4836 i);
4837 }
4838 else
4839#endif
4840 {
4841 Assert((uPde & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000000))) == 0);
4842 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & X86_PDE_PAE_PG_MASK);
4843 if (pSubPage)
4844 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4845 else
4846 AssertFatalMsgFailed(("%RX64\n", uPde & X86_PDE_PAE_PG_MASK));
4847 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4848 }
4849 }
4850 }
4851}
4852
4853
4854/**
4855 * Clear references to shadowed pages in a PAE page directory pointer table.
4856 *
4857 * @param pPool The pool.
4858 * @param pPage The page.
4859 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4860 */
4861DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4862{
4863 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4864 {
4865 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4866 Assert((uPdpe & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4867 if (uPdpe & X86_PDPE_P)
4868 {
4869 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & X86_PDPE_PG_MASK);
4870 if (pSubPage)
4871 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4872 else
4873 AssertFatalMsgFailed(("%RX64\n", uPdpe & X86_PDPE_PG_MASK));
4874 }
4875 }
4876}
4877
4878
4879/**
4880 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4881 *
4882 * @param pPool The pool.
4883 * @param pPage The page.
4884 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4885 */
4886DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4887{
4888 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4889 {
4890 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4891 Assert((uPdpe & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4892 if (uPdpe & X86_PDPE_P)
4893 {
4894 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & X86_PDPE_PG_MASK);
4895 if (pSubPage)
4896 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4897 else
4898 AssertFatalMsgFailed(("%RX64\n", uPdpe & X86_PDPE_PG_MASK));
4899 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4900 }
4901 }
4902}
4903
4904
4905/**
4906 * Clear references to shadowed pages in a 64-bit level 4 page table.
4907 *
4908 * @param pPool The pool.
4909 * @param pPage The page.
4910 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4911 */
4912DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4913{
4914 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4915 {
4916 X86PGPAEUINT const uPml4e = pShwPML4->a[i].u;
4917 Assert((uPml4e & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4918 if (uPml4e & X86_PML4E_P)
4919 {
4920 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPml4e & X86_PDPE_PG_MASK);
4921 if (pSubPage)
4922 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4923 else
4924 AssertFatalMsgFailed(("%RX64\n", uPml4e & X86_PML4E_PG_MASK));
4925 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4926 }
4927 }
4928}
4929
4930
4931/**
4932 * Clear references to shadowed pages in an EPT page directory.
4933 *
4934 * @param pPool The pool.
4935 * @param pPage The page.
4936 * @param pShwPD The shadow page directory (mapping of the page).
4937 */
4938DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4939{
4940 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4941 {
4942 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4943#ifdef PGM_WITH_LARGE_PAGES
4944 AssertMsg((uPde & UINT64_C(0xfff0000000000f00)) == 0, ("uPde=%RX64\n", uPde));
4945#else
4946 AssertMsg((uPde & UINT64_C(0xfff0000000000f80)) == 0, ("uPde=%RX64\n", uPde));
4947#endif
4948 if (uPde & EPT_E_READ)
4949 {
4950#ifdef PGM_WITH_LARGE_PAGES
4951 if (uPde & EPT_E_LEAF)
4952 {
4953 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4954 i, uPde & EPT_PDE2M_PG_MASK, pPage->GCPhys));
4955 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & EPT_PDE2M_PG_MASK,
4956 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4957 i);
4958 }
4959 else
4960#endif
4961 {
4962 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & EPT_PDE_PG_MASK);
4963 if (pSubPage)
4964 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4965 else
4966 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4967 }
4968 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4969 }
4970 }
4971}
4972
4973
4974/**
4975 * Clear references to shadowed pages in an EPT page directory pointer table.
4976 *
4977 * @param pPool The pool.
4978 * @param pPage The page.
4979 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4980 */
4981DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4982{
4983 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4984 {
4985 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4986 Assert((uPdpe & UINT64_C(0xfff0000000000f80)) == 0);
4987 if (uPdpe & EPT_E_READ)
4988 {
4989 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & EPT_PDPTE_PG_MASK);
4990 if (pSubPage)
4991 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4992 else
4993 AssertFatalMsgFailed(("%RX64\n", uPdpe & EPT_PDPTE_PG_MASK));
4994 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4995 }
4996 }
4997}
4998
4999
5000/**
5001 * Clears all references made by this page.
5002 *
5003 * This includes other shadow pages and GC physical addresses.
5004 *
5005 * @param pPool The pool.
5006 * @param pPage The page.
5007 */
5008static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
5009{
5010 /*
5011 * Map the shadow page and take action according to the page kind.
5012 */
5013 PVMCC pVM = pPool->CTX_SUFF(pVM);
5014 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5015 switch (pPage->enmKind)
5016 {
5017 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5018 {
5019 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
5020 void *pvGst;
5021 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
5022 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
5023 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
5024 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
5025 break;
5026 }
5027
5028 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5029 {
5030 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
5031 void *pvGst;
5032 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
5033 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
5034 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
5035 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
5036 break;
5037 }
5038
5039 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5040 {
5041 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
5042 void *pvGst;
5043 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
5044 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
5045 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
5046 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
5047 break;
5048 }
5049
5050 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
5051 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5052 {
5053 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
5054 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
5055 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
5056 break;
5057 }
5058
5059 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
5060 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5061 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5062 {
5063 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
5064 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
5065 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
5066 break;
5067 }
5068
5069 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5070 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5071 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5072 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5073 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5074 case PGMPOOLKIND_PAE_PD_PHYS:
5075 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5076 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5077 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
5078 break;
5079
5080 case PGMPOOLKIND_32BIT_PD_PHYS:
5081 case PGMPOOLKIND_32BIT_PD:
5082 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
5083 break;
5084
5085 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5086 case PGMPOOLKIND_PAE_PDPT:
5087 case PGMPOOLKIND_PAE_PDPT_PHYS:
5088 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
5089 break;
5090
5091 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5092 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5093 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
5094 break;
5095
5096 case PGMPOOLKIND_64BIT_PML4:
5097 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
5098 break;
5099
5100 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5101 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
5102 break;
5103
5104 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5105 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
5106 break;
5107
5108 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5109 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
5110 break;
5111
5112#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
5113 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
5114 {
5115 void *pvGst;
5116 int const rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
5117 pgmPoolTrackDerefNestedPTEPT(pPool, pPage, (PEPTPT)pvShw, (PCEPTPT)pvGst);
5118 break;
5119 }
5120
5121 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
5122 pgmPoolTrackDerefNestedPTEPT2MB(pPool, pPage, (PEPTPT)pvShw);
5123 break;
5124
5125 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
5126 {
5127 void *pvGst;
5128 int const rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
5129 pgmPoolTrackDerefNestedPDEpt(pPool, pPage, (PEPTPD)pvShw, (PCEPTPD)pvGst);
5130 break;
5131 }
5132
5133 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
5134 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
5135 break;
5136#endif
5137
5138 default:
5139 AssertFatalMsgFailed(("enmKind=%d GCPhys=%RGp\n", pPage->enmKind, pPage->GCPhys));
5140 }
5141
5142 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
5143 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5144 ASMMemZeroPage(pvShw);
5145 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5146 pPage->fZeroed = true;
5147 Assert(!pPage->cPresent);
5148 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
5149}
5150
5151
5152/**
5153 * Flushes a pool page.
5154 *
5155 * This moves the page to the free list after removing all user references to it.
5156 *
5157 * @returns VBox status code.
5158 * @retval VINF_SUCCESS on success.
5159 * @param pPool The pool.
5160 * @param pPage The shadow page.
5161 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
5162 */
5163int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
5164{
5165 PVMCC pVM = pPool->CTX_SUFF(pVM);
5166 bool fFlushRequired = false;
5167
5168 int rc = VINF_SUCCESS;
5169 STAM_PROFILE_START(&pPool->StatFlushPage, f);
5170 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
5171 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
5172
5173 if (PGMPOOL_PAGE_IS_NESTED(pPage))
5174 Log7Func(("pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
5175 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
5176
5177 /*
5178 * Reject any attempts at flushing any of the special root pages (shall
5179 * not happen).
5180 */
5181 AssertMsgReturn(pPage->idx >= PGMPOOL_IDX_FIRST,
5182 ("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n",
5183 pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx),
5184 VINF_SUCCESS);
5185
5186 PGM_LOCK_VOID(pVM);
5187
5188 /*
5189 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
5190 */
5191 if (pgmPoolIsPageLocked(pPage))
5192 {
5193#if !defined(VBOX_VMM_TARGET_ARMV8)
5194 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
5195 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
5196 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
5197 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
5198 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
5199 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
5200 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
5201 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
5202 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
5203 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
5204 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
5205#endif
5206 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
5207 PGM_UNLOCK(pVM);
5208 return VINF_SUCCESS;
5209 }
5210
5211 /*
5212 * Mark the page as being in need of an ASMMemZeroPage().
5213 */
5214 pPage->fZeroed = false;
5215
5216#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5217 if (pPage->fDirty)
5218 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirtyEntry, false /* do not remove */);
5219#endif
5220
5221 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
5222 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
5223 fFlushRequired = true;
5224
5225 /*
5226 * Clear the page.
5227 */
5228 pgmPoolTrackClearPageUsers(pPool, pPage);
5229 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
5230 pgmPoolTrackDeref(pPool, pPage);
5231 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
5232
5233 /*
5234 * Flush it from the cache.
5235 */
5236 pgmPoolCacheFlushPage(pPool, pPage);
5237
5238 /*
5239 * Deregistering the monitoring.
5240 */
5241 if (pPage->fMonitored)
5242 rc = pgmPoolMonitorFlush(pPool, pPage);
5243
5244 /*
5245 * Free the page.
5246 */
5247 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
5248 pPage->iNext = pPool->iFreeHead;
5249 pPool->iFreeHead = pPage->idx;
5250 pPage->enmKind = PGMPOOLKIND_FREE;
5251 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5252 pPage->GCPhys = NIL_RTGCPHYS;
5253 pPage->fReusedFlushPending = false;
5254
5255 pPool->cUsedPages--;
5256
5257 /* Flush the TLBs of all VCPUs if required. */
5258 if ( fFlushRequired
5259 && fFlush)
5260 {
5261 PGM_INVL_ALL_VCPU_TLBS(pVM);
5262 }
5263
5264 PGM_UNLOCK(pVM);
5265 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
5266 return rc;
5267}
5268
5269
5270/**
5271 * Frees a usage of a pool page.
5272 *
5273 * The caller is responsible to updating the user table so that it no longer
5274 * references the shadow page.
5275 *
5276 * @param pPool The pool.
5277 * @param pPage The shadow page.
5278 * @param iUser The shadow page pool index of the user table.
5279 * NIL_PGMPOOL_IDX for root pages.
5280 * @param iUserTable The index into the user table (shadowed). Ignored if
5281 * root page.
5282 */
5283void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
5284{
5285 PVMCC pVM = pPool->CTX_SUFF(pVM);
5286
5287 STAM_PROFILE_START(&pPool->StatFree, a);
5288 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
5289 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
5290 AssertReturnVoid(pPage->idx >= PGMPOOL_IDX_FIRST); /* paranoia (#6349) */
5291
5292 PGM_LOCK_VOID(pVM);
5293 if (iUser != NIL_PGMPOOL_IDX)
5294 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
5295 if (!pPage->fCached)
5296 pgmPoolFlushPage(pPool, pPage);
5297 PGM_UNLOCK(pVM);
5298 STAM_PROFILE_STOP(&pPool->StatFree, a);
5299}
5300
5301
5302/**
5303 * Makes one or more free page free.
5304 *
5305 * @returns VBox status code.
5306 * @retval VINF_SUCCESS on success.
5307 *
5308 * @param pPool The pool.
5309 * @param enmKind Page table kind
5310 * @param iUser The user of the page.
5311 */
5312static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
5313{
5314 PVMCC pVM = pPool->CTX_SUFF(pVM);
5315 LogFlow(("pgmPoolMakeMoreFreePages: enmKind=%d iUser=%d\n", enmKind, iUser));
5316 NOREF(enmKind);
5317
5318 /*
5319 * If the pool isn't full grown yet, expand it.
5320 */
5321 if (pPool->cCurPages < pPool->cMaxPages)
5322 {
5323 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
5324#ifdef IN_RING3
5325 int rc = PGMR3PoolGrow(pVM, VMMGetCpu(pVM));
5326#else
5327 int rc = PGMR0PoolGrow(pVM, VMMGetCpuId(pVM));
5328#endif
5329 if (RT_FAILURE(rc))
5330 return rc;
5331 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
5332 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
5333 return VINF_SUCCESS;
5334 }
5335
5336 /*
5337 * Free one cached page.
5338 */
5339 return pgmPoolCacheFreeOne(pPool, iUser);
5340}
5341
5342
5343/**
5344 * Allocates a page from the pool.
5345 *
5346 * This page may actually be a cached page and not in need of any processing
5347 * on the callers part.
5348 *
5349 * @returns VBox status code.
5350 * @retval VINF_SUCCESS if a NEW page was allocated.
5351 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
5352 *
5353 * @param pVM The cross context VM structure.
5354 * @param GCPhys The GC physical address of the page we're gonna shadow.
5355 * For 4MB and 2MB PD entries, it's the first address the
5356 * shadow PT is covering.
5357 * @param enmKind The kind of mapping.
5358 * @param enmAccess Access type for the mapping (only relevant for big pages)
5359 * @param fA20Enabled Whether the A20 gate is enabled or not.
5360 * @param iUser The shadow page pool index of the user table. Root
5361 * pages should pass NIL_PGMPOOL_IDX.
5362 * @param iUserTable The index into the user table (shadowed). Ignored for
5363 * root pages (iUser == NIL_PGMPOOL_IDX).
5364 * @param fLockPage Lock the page
5365 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
5366 */
5367int pgmPoolAlloc(PVMCC pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
5368 uint16_t iUser, uint32_t iUserTable, bool fLockPage, PPPGMPOOLPAGE ppPage)
5369{
5370 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5371 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
5372 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
5373 *ppPage = NULL;
5374 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
5375 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
5376 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
5377
5378#if defined(VBOX_STRICT) && defined(VBOX_WITH_NESTED_HWVIRT_VMX_EPT)
5379 PVMCPUCC pVCpu = VMMGetCpu(pVM);
5380 Assert(pVCpu->pgm.s.enmGuestSlatMode == PGMSLAT_DIRECT || PGMPOOL_PAGE_IS_KIND_NESTED(enmKind));
5381#endif
5382
5383 PGM_LOCK_VOID(pVM);
5384
5385 if (pPool->fCacheEnabled)
5386 {
5387 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, fA20Enabled, iUser, iUserTable, ppPage);
5388 if (RT_SUCCESS(rc2))
5389 {
5390 if (fLockPage)
5391 pgmPoolLockPage(pPool, *ppPage);
5392 PGM_UNLOCK(pVM);
5393 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5394 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
5395 return rc2;
5396 }
5397 }
5398
5399 /*
5400 * Allocate a new one.
5401 */
5402 int rc = VINF_SUCCESS;
5403 uint16_t iNew = pPool->iFreeHead;
5404 if (iNew == NIL_PGMPOOL_IDX)
5405 {
5406 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
5407 if (RT_FAILURE(rc))
5408 {
5409 PGM_UNLOCK(pVM);
5410 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
5411 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5412 return rc;
5413 }
5414 iNew = pPool->iFreeHead;
5415 AssertReleaseMsgReturn(iNew != NIL_PGMPOOL_IDX, ("iNew=%#x\n", iNew), VERR_PGM_POOL_IPE);
5416 }
5417
5418 /* unlink the free head */
5419 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5420 pPool->iFreeHead = pPage->iNext;
5421 pPage->iNext = NIL_PGMPOOL_IDX;
5422
5423 /*
5424 * Initialize it.
5425 */
5426 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5427 pPage->enmKind = enmKind;
5428 pPage->enmAccess = enmAccess;
5429 pPage->GCPhys = GCPhys;
5430 pPage->fA20Enabled = fA20Enabled;
5431 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5432 pPage->fMonitored = false;
5433 pPage->fCached = false;
5434 pPage->fDirty = false;
5435 pPage->fReusedFlushPending = false;
5436 pPage->cModifications = 0;
5437 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5438 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5439 pPage->cPresent = 0;
5440 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5441 pPage->idxDirtyEntry = 0;
5442 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5443 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5444 pPage->cLastAccessHandler = 0;
5445 pPage->cLocked = 0;
5446# ifdef VBOX_STRICT
5447 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5448# endif
5449
5450 /*
5451 * Insert into the tracking and cache. If this fails, free the page.
5452 */
5453 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5454 if (RT_FAILURE(rc3))
5455 {
5456 pPool->cUsedPages--;
5457 pPage->enmKind = PGMPOOLKIND_FREE;
5458 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5459 pPage->GCPhys = NIL_RTGCPHYS;
5460 pPage->iNext = pPool->iFreeHead;
5461 pPool->iFreeHead = pPage->idx;
5462 PGM_UNLOCK(pVM);
5463 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5464 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
5465 return rc3;
5466 }
5467
5468 /*
5469 * Commit the allocation, clear the page and return.
5470 */
5471#ifdef VBOX_WITH_STATISTICS
5472 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5473 pPool->cUsedPagesHigh = pPool->cUsedPages;
5474#endif
5475
5476 if (!pPage->fZeroed)
5477 {
5478 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5479 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5480 ASMMemZeroPage(pv);
5481 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5482 }
5483
5484 *ppPage = pPage;
5485 if (fLockPage)
5486 pgmPoolLockPage(pPool, pPage);
5487 PGM_UNLOCK(pVM);
5488 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5489 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5490 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5491 return rc;
5492}
5493
5494
5495/**
5496 * Frees a usage of a pool page.
5497 *
5498 * @param pVM The cross context VM structure.
5499 * @param HCPhys The HC physical address of the shadow page.
5500 * @param iUser The shadow page pool index of the user table.
5501 * NIL_PGMPOOL_IDX if root page.
5502 * @param iUserTable The index into the user table (shadowed). Ignored if
5503 * root page.
5504 */
5505void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5506{
5507 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5508 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5509 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5510}
5511
5512
5513/**
5514 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5515 *
5516 * @returns Pointer to the shadow page structure.
5517 * @param pPool The pool.
5518 * @param HCPhys The HC physical address of the shadow page.
5519 */
5520PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5521{
5522 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5523
5524 /*
5525 * Look up the page.
5526 */
5527 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5528
5529 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5530 return pPage;
5531}
5532
5533
5534/**
5535 * Internal worker for finding a page for debugging purposes, no assertions.
5536 *
5537 * @returns Pointer to the shadow page structure. NULL on if not found.
5538 * @param pPool The pool.
5539 * @param HCPhys The HC physical address of the shadow page.
5540 */
5541PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5542{
5543 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5544 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5545}
5546
5547
5548/**
5549 * Internal worker for PGM_HCPHYS_2_PTR.
5550 *
5551 * @returns VBox status code.
5552 * @param pVM The cross context VM structure.
5553 * @param HCPhys The HC physical address of the shadow page.
5554 * @param ppv Where to return the address.
5555 */
5556int pgmPoolHCPhys2Ptr(PVM pVM, RTHCPHYS HCPhys, void **ppv)
5557{
5558 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pVM->pgm.s.CTX_SUFF(pPool)->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5559 AssertMsgReturn(pPage && pPage->enmKind != PGMPOOLKIND_FREE,
5560 ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0),
5561 VERR_PGM_POOL_GET_PAGE_FAILED);
5562 *ppv = (uint8_t *)pPage->CTX_SUFF(pvPage) + (HCPhys & PAGE_OFFSET_MASK);
5563 return VINF_SUCCESS;
5564}
5565
5566#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5567
5568/**
5569 * Flush the specified page if present
5570 *
5571 * @param pVM The cross context VM structure.
5572 * @param GCPhys Guest physical address of the page to flush
5573 */
5574void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5575{
5576 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5577
5578 VM_ASSERT_EMT(pVM);
5579
5580 /*
5581 * Look up the GCPhys in the hash.
5582 */
5583 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5584 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5585 if (i == NIL_PGMPOOL_IDX)
5586 return;
5587
5588 do
5589 {
5590 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5591 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5592 {
5593 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage)); /* Temporary to see if it hits. Remove later. */
5594 switch (pPage->enmKind)
5595 {
5596 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5597 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5598 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5599 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5600 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5601 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5602 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5603 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5604 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5605 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5606 case PGMPOOLKIND_64BIT_PML4:
5607 case PGMPOOLKIND_32BIT_PD:
5608 case PGMPOOLKIND_PAE_PDPT:
5609 {
5610 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5611# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5612 if (pPage->fDirty)
5613 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5614 else
5615# endif
5616 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5617 Assert(!pgmPoolIsPageLocked(pPage));
5618 pgmPoolMonitorChainFlush(pPool, pPage);
5619 return;
5620 }
5621
5622 /* ignore, no monitoring. */
5623 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5624 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5625 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5626 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5627 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5628 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5629 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5630 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5631 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5632 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5633 case PGMPOOLKIND_ROOT_NESTED:
5634 case PGMPOOLKIND_PAE_PD_PHYS:
5635 case PGMPOOLKIND_PAE_PDPT_PHYS:
5636 case PGMPOOLKIND_32BIT_PD_PHYS:
5637 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5638 break;
5639
5640 default:
5641 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5642 }
5643 }
5644
5645 /* next */
5646 i = pPage->iNext;
5647 } while (i != NIL_PGMPOOL_IDX);
5648 return;
5649}
5650
5651
5652/**
5653 * Reset CPU on hot plugging.
5654 *
5655 * @param pVM The cross context VM structure.
5656 * @param pVCpu The cross context virtual CPU structure.
5657 */
5658void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5659{
5660 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5661
5662 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5663 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5664 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5665}
5666
5667
5668/**
5669 * Flushes the entire cache.
5670 *
5671 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5672 * this and execute this CR3 flush.
5673 *
5674 * @param pVM The cross context VM structure.
5675 */
5676void pgmR3PoolReset(PVM pVM)
5677{
5678 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5679
5680 PGM_LOCK_ASSERT_OWNER(pVM);
5681 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5682 LogFlow(("pgmR3PoolReset:\n"));
5683
5684 /*
5685 * If there are no pages in the pool, there is nothing to do.
5686 */
5687 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5688 {
5689 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5690 return;
5691 }
5692
5693 /*
5694 * Exit the shadow mode since we're going to clear everything,
5695 * including the root page.
5696 */
5697 VMCC_FOR_EACH_VMCPU(pVM)
5698 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5699 VMCC_FOR_EACH_VMCPU_END(pVM);
5700
5701
5702 /*
5703 * Nuke the free list and reinsert all pages into it.
5704 */
5705 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5706 {
5707 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5708
5709 if (pPage->fMonitored)
5710 pgmPoolMonitorFlush(pPool, pPage);
5711 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5712 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5713 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5714 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5715 pPage->GCPhys = NIL_RTGCPHYS;
5716 pPage->enmKind = PGMPOOLKIND_FREE;
5717 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5718 Assert(pPage->idx == i);
5719 pPage->iNext = i + 1;
5720 pPage->fA20Enabled = true;
5721 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5722 pPage->fSeenNonGlobal = false;
5723 pPage->fMonitored = false;
5724 pPage->fDirty = false;
5725 pPage->fCached = false;
5726 pPage->fReusedFlushPending = false;
5727 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5728 pPage->cPresent = 0;
5729 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5730 pPage->cModifications = 0;
5731 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5732 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5733 pPage->idxDirtyEntry = 0;
5734 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5735 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5736 pPage->cLastAccessHandler = 0;
5737 pPage->cLocked = 0;
5738# ifdef VBOX_STRICT
5739 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5740# endif
5741 }
5742 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5743 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5744 pPool->cUsedPages = 0;
5745
5746 /*
5747 * Zap and reinitialize the user records.
5748 */
5749 pPool->cPresent = 0;
5750 pPool->iUserFreeHead = 0;
5751 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5752 const unsigned cMaxUsers = pPool->cMaxUsers;
5753 for (unsigned i = 0; i < cMaxUsers; i++)
5754 {
5755 paUsers[i].iNext = i + 1;
5756 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5757 paUsers[i].iUserTable = 0xfffffffe;
5758 }
5759 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5760
5761 /*
5762 * Clear all the GCPhys links and rebuild the phys ext free list.
5763 */
5764 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
5765 pRam;
5766 pRam = pRam->CTX_SUFF(pNext))
5767 {
5768 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5769 while (iPage-- > 0)
5770 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5771 }
5772
5773 pPool->iPhysExtFreeHead = 0;
5774 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5775 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5776 for (unsigned i = 0; i < cMaxPhysExts; i++)
5777 {
5778 paPhysExts[i].iNext = i + 1;
5779 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5780 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5781 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5782 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5783 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5784 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5785 }
5786 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5787
5788 /*
5789 * Just zap the modified list.
5790 */
5791 pPool->cModifiedPages = 0;
5792 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5793
5794 /*
5795 * Clear the GCPhys hash and the age list.
5796 */
5797 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5798 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5799 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5800 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5801
5802# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5803 /* Clear all dirty pages. */
5804 pPool->idxFreeDirtyPage = 0;
5805 pPool->cDirtyPages = 0;
5806 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aidxDirtyPages); i++)
5807 pPool->aidxDirtyPages[i] = NIL_PGMPOOL_IDX;
5808# endif
5809
5810 /*
5811 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5812 */
5813 VMCC_FOR_EACH_VMCPU(pVM)
5814 {
5815 /*
5816 * Re-enter the shadowing mode and assert Sync CR3 FF.
5817 */
5818 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5819 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5820 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5821 }
5822 VMCC_FOR_EACH_VMCPU_END(pVM);
5823
5824 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5825}
5826
5827#endif /* IN_RING3 */
5828
5829#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5830/**
5831 * Stringifies a PGMPOOLKIND value.
5832 */
5833static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5834{
5835 switch ((PGMPOOLKIND)enmKind)
5836 {
5837 case PGMPOOLKIND_INVALID:
5838 return "PGMPOOLKIND_INVALID";
5839 case PGMPOOLKIND_FREE:
5840 return "PGMPOOLKIND_FREE";
5841 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5842 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5843 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5844 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5845 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5846 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5847 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5848 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5849 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5850 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5851 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5852 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5853 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5854 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5855 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5856 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5857 case PGMPOOLKIND_32BIT_PD:
5858 return "PGMPOOLKIND_32BIT_PD";
5859 case PGMPOOLKIND_32BIT_PD_PHYS:
5860 return "PGMPOOLKIND_32BIT_PD_PHYS";
5861 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5862 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5863 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5864 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5865 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5866 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5867 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5868 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5869 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5870 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5871 case PGMPOOLKIND_PAE_PD_PHYS:
5872 return "PGMPOOLKIND_PAE_PD_PHYS";
5873 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5874 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5875 case PGMPOOLKIND_PAE_PDPT:
5876 return "PGMPOOLKIND_PAE_PDPT";
5877 case PGMPOOLKIND_PAE_PDPT_PHYS:
5878 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5879 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5880 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5881 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5882 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5883 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5884 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5885 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5886 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5887 case PGMPOOLKIND_64BIT_PML4:
5888 return "PGMPOOLKIND_64BIT_PML4";
5889 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5890 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5891 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5892 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5893 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5894 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5895 case PGMPOOLKIND_ROOT_NESTED:
5896 return "PGMPOOLKIND_ROOT_NESTED";
5897 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
5898 return "PGMPOOLKIND_EPT_PT_FOR_EPT_PT";
5899 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
5900 return "PGMPOOLKIND_EPT_PT_FOR_EPT_2MB";
5901 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
5902 return "PGMPOOLKIND_EPT_PD_FOR_EPT_PD";
5903 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
5904 return "PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT";
5905 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
5906 return "PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4";
5907 }
5908 return "Unknown kind!";
5909}
5910#endif /* LOG_ENABLED || VBOX_STRICT */
5911
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette