VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 104939

Last change on this file since 104939 was 104840, checked in by vboxsync, 8 months ago

VMM/PGM: Refactored RAM ranges, MMIO2 ranges and ROM ranges and added MMIO ranges (to PGM) so we can safely access RAM ranges at runtime w/o fear of them ever being freed up. It is now only possible to create these during VM creation and loading, and they will live till VM destruction (except for MMIO2 which could be destroyed during loading (PCNet fun)). The lookup handling is by table instead of pointer tree. No more ring-0 pointers in shared data. bugref:10687 bugref:10093

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 226.6 KB
Line 
1/* $Id: PGMAllPool.cpp 104840 2024-06-05 00:59:51Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_PGM_POOL
33#define VBOX_WITHOUT_PAGING_BIT_FIELDS /* 64-bit bitfields are just asking for trouble. See @bugref{9841} and others. */
34#include <VBox/vmm/pgm.h>
35#include <VBox/vmm/mm.h>
36#include <VBox/vmm/em.h>
37#include <VBox/vmm/cpum.h>
38#include "PGMInternal.h"
39#include <VBox/vmm/vmcc.h>
40#include "PGMInline.h"
41#include <VBox/vmm/hm_vmx.h>
42
43#include <VBox/log.h>
44#include <VBox/err.h>
45#include <iprt/asm.h>
46#include <iprt/asm-mem.h>
47#include <iprt/string.h>
48
49
50/*********************************************************************************************************************************
51* Internal Functions *
52*********************************************************************************************************************************/
53RT_C_DECLS_BEGIN
54#if 0 /* unused */
55DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
56DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
57#endif /* unused */
58static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
59static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
60static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
61static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
62#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
63static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
64#endif
65#if 0 /*defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)*/
66static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
67#endif
68
69int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage);
70PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVMCC pVM, uint16_t *piPhysExt);
71void pgmPoolTrackPhysExtFree(PVMCC pVM, uint16_t iPhysExt);
72void pgmPoolTrackPhysExtFreeList(PVMCC pVM, uint16_t iPhysExt);
73
74RT_C_DECLS_END
75
76
77#if 0 /* unused */
78/**
79 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
80 *
81 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
82 * @param enmKind The page kind.
83 */
84DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
85{
86 switch (enmKind)
87 {
88 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
89 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
90 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
91 return true;
92 default:
93 return false;
94 }
95}
96#endif /* unused */
97
98
99/**
100 * Flushes a chain of pages sharing the same access monitor.
101 *
102 * @param pPool The pool.
103 * @param pPage A page in the chain.
104 */
105void pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
106{
107 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
108
109 /*
110 * Find the list head.
111 */
112 uint16_t idx = pPage->idx;
113 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
114 {
115 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
116 {
117 idx = pPage->iMonitoredPrev;
118 Assert(idx != pPage->idx);
119 pPage = &pPool->aPages[idx];
120 }
121 }
122
123 /*
124 * Iterate the list flushing each shadow page.
125 */
126 for (;;)
127 {
128 idx = pPage->iMonitoredNext;
129 Assert(idx != pPage->idx);
130 if (pPage->idx >= PGMPOOL_IDX_FIRST)
131 {
132 int rc2 = pgmPoolFlushPage(pPool, pPage);
133 AssertRC(rc2);
134 }
135 /* next */
136 if (idx == NIL_PGMPOOL_IDX)
137 break;
138 pPage = &pPool->aPages[idx];
139 }
140}
141
142
143/**
144 * Wrapper for getting the current context pointer to the entry being modified.
145 *
146 * @returns VBox status code suitable for scheduling.
147 * @param pVM The cross context VM structure.
148 * @param pvDst Destination address
149 * @param pvSrc Pointer to the mapping of @a GCPhysSrc or NULL depending
150 * on the context (e.g. \#PF in R0 & RC).
151 * @param GCPhysSrc The source guest physical address.
152 * @param cb Size of data to read
153 */
154DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVMCC pVM, void *pvDst, void const *pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
155{
156#if defined(IN_RING3)
157 NOREF(pVM); NOREF(GCPhysSrc);
158 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
159 return VINF_SUCCESS;
160#else
161 /** @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
162 NOREF(pvSrc);
163 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
164#endif
165}
166
167
168/**
169 * Process shadow entries before they are changed by the guest.
170 *
171 * For PT entries we will clear them. For PD entries, we'll simply check
172 * for mapping conflicts and set the SyncCR3 FF if found.
173 *
174 * @param pVCpu The cross context virtual CPU structure.
175 * @param pPool The pool.
176 * @param pPage The head page.
177 * @param GCPhysFault The guest physical fault address.
178 * @param pvAddress Pointer to the mapping of @a GCPhysFault or NULL
179 * depending on the context (e.g. \#PF in R0 & RC).
180 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
181 */
182static void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault,
183 void const *pvAddress, unsigned cbWrite)
184{
185 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
186 const unsigned off = GCPhysFault & GUEST_PAGE_OFFSET_MASK;
187 PVMCC pVM = pPool->CTX_SUFF(pVM);
188 NOREF(pVCpu);
189
190 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n",
191 (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))(uintptr_t)pvAddress, GCPhysFault, cbWrite));
192
193 if (PGMPOOL_PAGE_IS_NESTED(pPage))
194 Log7Func(("%RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))(uintptr_t)pvAddress, GCPhysFault, cbWrite));
195
196 for (;;)
197 {
198 union
199 {
200 void *pv;
201 PX86PT pPT;
202 PPGMSHWPTPAE pPTPae;
203 PX86PD pPD;
204 PX86PDPAE pPDPae;
205 PX86PDPT pPDPT;
206 PX86PML4 pPML4;
207#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
208 PEPTPDPT pEptPdpt;
209 PEPTPD pEptPd;
210 PEPTPT pEptPt;
211#endif
212 } uShw;
213
214 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s write=%#x\n",
215 pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
216
217 uShw.pv = NULL;
218 switch (pPage->enmKind)
219 {
220 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
221 {
222 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
223 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
224 const unsigned iShw = off / sizeof(X86PTE);
225 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
226 X86PGUINT const uPde = uShw.pPT->a[iShw].u;
227 if (uPde & X86_PTE_P)
228 {
229 X86PTE GstPte;
230 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
231 AssertRC(rc);
232 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uPde & X86_PTE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
233 pgmPoolTracDerefGCPhysHint(pPool, pPage, uPde & X86_PTE_PG_MASK, GstPte.u & X86_PTE_PG_MASK, iShw);
234 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
235 }
236 break;
237 }
238
239 /* page/2 sized */
240 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
241 {
242 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
243 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
244 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
245 {
246 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
247 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
248 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
249 {
250 X86PTE GstPte;
251 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
252 AssertRC(rc);
253
254 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
255 pgmPoolTracDerefGCPhysHint(pPool, pPage,
256 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
257 GstPte.u & X86_PTE_PG_MASK,
258 iShw);
259 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
260 }
261 }
262 break;
263 }
264
265 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
266 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
267 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
268 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
269 {
270 unsigned iGst = off / sizeof(X86PDE);
271 unsigned iShwPdpt = iGst / 256;
272 unsigned iShw = (iGst % 256) * 2;
273 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
274
275 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
276 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
277 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
278 {
279 for (unsigned i = 0; i < 2; i++)
280 {
281 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw + i].u;
282 if (uPde & X86_PDE_P)
283 {
284 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw + i, uPde));
285 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw + i);
286 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw + i].u, 0);
287 }
288
289 /* paranoia / a bit assumptive. */
290 if ( (off & 3)
291 && (off & 3) + cbWrite > 4)
292 {
293 const unsigned iShw2 = iShw + 2 + i;
294 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
295 {
296 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
297 if (uPde2 & X86_PDE_P)
298 {
299 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
300 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
301 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
302 }
303 }
304 }
305 }
306 }
307 break;
308 }
309
310 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
311 {
312 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
313 const unsigned iShw = off / sizeof(X86PTEPAE);
314 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
315 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
316 {
317 X86PTEPAE GstPte;
318 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
319 AssertRC(rc);
320
321 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
322 pgmPoolTracDerefGCPhysHint(pPool, pPage,
323 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
324 GstPte.u & X86_PTE_PAE_PG_MASK,
325 iShw);
326 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
327 }
328
329 /* paranoia / a bit assumptive. */
330 if ( (off & 7)
331 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
332 {
333 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
334 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
335
336 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
337 {
338 X86PTEPAE GstPte;
339 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
340 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
341 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
342 AssertRC(rc);
343 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
344 pgmPoolTracDerefGCPhysHint(pPool, pPage,
345 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
346 GstPte.u & X86_PTE_PAE_PG_MASK,
347 iShw2);
348 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
349 }
350 }
351 break;
352 }
353
354 case PGMPOOLKIND_32BIT_PD:
355 {
356 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
357 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
358
359 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
360 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
361 X86PGUINT const uPde = uShw.pPD->a[iShw].u;
362 if (uPde & X86_PDE_P)
363 {
364 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
365 pgmPoolFree(pVM, uPde & X86_PDE_PG_MASK, pPage->idx, iShw);
366 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
367 }
368
369 /* paranoia / a bit assumptive. */
370 if ( (off & 3)
371 && (off & 3) + cbWrite > sizeof(X86PTE))
372 {
373 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
374 if ( iShw2 != iShw
375 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
376 {
377 X86PGUINT const uPde2 = uShw.pPD->a[iShw2].u;
378 if (uPde2 & X86_PDE_P)
379 {
380 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
381 pgmPoolFree(pVM, uPde2 & X86_PDE_PG_MASK, pPage->idx, iShw2);
382 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
383 }
384 }
385 }
386#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). - not working any longer... */
387 if ( uShw.pPD->a[iShw].n.u1Present
388 && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
389 {
390 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
391 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
392 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
393 }
394#endif
395 break;
396 }
397
398 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
399 {
400 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
401 const unsigned iShw = off / sizeof(X86PDEPAE);
402 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
403
404 /*
405 * Causes trouble when the guest uses a PDE to refer to the whole page table level
406 * structure. (Invalidate here; faults later on when it tries to change the page
407 * table entries -> recheck; probably only applies to the RC case.)
408 */
409 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw].u;
410 if (uPde & X86_PDE_P)
411 {
412 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
413 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
414 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
415 }
416
417 /* paranoia / a bit assumptive. */
418 if ( (off & 7)
419 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
420 {
421 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
422 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
423
424 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
425 if (uPde2 & X86_PDE_P)
426 {
427 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
428 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
429 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
430 }
431 }
432 break;
433 }
434
435 case PGMPOOLKIND_PAE_PDPT:
436 {
437 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
438 /*
439 * Hopefully this doesn't happen very often:
440 * - touching unused parts of the page
441 * - messing with the bits of pd pointers without changing the physical address
442 */
443 /* PDPT roots are not page aligned; 32 byte only! */
444 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
445
446 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
447 const unsigned iShw = offPdpt / sizeof(X86PDPE);
448 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
449 {
450 X86PGPAEUINT const uPdpe = uShw.pPDPT->a[iShw].u;
451 if (uPdpe & X86_PDPE_P)
452 {
453 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
454 pgmPoolFree(pVM, uPdpe & X86_PDPE_PG_MASK, pPage->idx, iShw);
455 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
456 }
457
458 /* paranoia / a bit assumptive. */
459 if ( (offPdpt & 7)
460 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
461 {
462 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
463 if ( iShw2 != iShw
464 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
465 {
466 X86PGPAEUINT const uPdpe2 = uShw.pPDPT->a[iShw2].u;
467 if (uPdpe2 & X86_PDPE_P)
468 {
469 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
470 pgmPoolFree(pVM, uPdpe2 & X86_PDPE_PG_MASK, pPage->idx, iShw2);
471 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
472 }
473 }
474 }
475 }
476 break;
477 }
478
479 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
480 {
481 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
482 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
483 const unsigned iShw = off / sizeof(X86PDEPAE);
484 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw].u;
485 if (uPde & X86_PDE_P)
486 {
487 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
488 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
489 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
490 }
491
492 /* paranoia / a bit assumptive. */
493 if ( (off & 7)
494 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
495 {
496 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
497 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
498 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
499 if (uPde2 & X86_PDE_P)
500 {
501 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
502 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
503 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
504 }
505 }
506 break;
507 }
508
509 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
510 {
511 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
512 /*
513 * Hopefully this doesn't happen very often:
514 * - messing with the bits of pd pointers without changing the physical address
515 */
516 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
517 const unsigned iShw = off / sizeof(X86PDPE);
518 X86PGPAEUINT const uPdpe = uShw.pPDPT->a[iShw].u;
519 if (uPdpe & X86_PDPE_P)
520 {
521 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uPdpe));
522 pgmPoolFree(pVM, uPdpe & X86_PDPE_PG_MASK, pPage->idx, iShw);
523 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
524 }
525 /* paranoia / a bit assumptive. */
526 if ( (off & 7)
527 && (off & 7) + cbWrite > sizeof(X86PDPE))
528 {
529 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
530 X86PGPAEUINT const uPdpe2 = uShw.pPDPT->a[iShw2].u;
531 if (uPdpe2 & X86_PDPE_P)
532 {
533 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPdpe2));
534 pgmPoolFree(pVM, uPdpe2 & X86_PDPE_PG_MASK, pPage->idx, iShw2);
535 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
536 }
537 }
538 break;
539 }
540
541 case PGMPOOLKIND_64BIT_PML4:
542 {
543 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
544 /*
545 * Hopefully this doesn't happen very often:
546 * - messing with the bits of pd pointers without changing the physical address
547 */
548 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
549 const unsigned iShw = off / sizeof(X86PDPE);
550 X86PGPAEUINT const uPml4e = uShw.pPML4->a[iShw].u;
551 if (uPml4e & X86_PML4E_P)
552 {
553 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uPml4e));
554 pgmPoolFree(pVM, uPml4e & X86_PML4E_PG_MASK, pPage->idx, iShw);
555 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
556 }
557 /* paranoia / a bit assumptive. */
558 if ( (off & 7)
559 && (off & 7) + cbWrite > sizeof(X86PDPE))
560 {
561 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
562 X86PGPAEUINT const uPml4e2 = uShw.pPML4->a[iShw2].u;
563 if (uPml4e2 & X86_PML4E_P)
564 {
565 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPml4e2));
566 pgmPoolFree(pVM, uPml4e2 & X86_PML4E_PG_MASK, pPage->idx, iShw2);
567 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
568 }
569 }
570 break;
571 }
572
573#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
574 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
575 {
576 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
577 const unsigned iShw = off / sizeof(EPTPML4E);
578 X86PGPAEUINT const uPml4e = uShw.pPML4->a[iShw].u;
579 if (uPml4e & EPT_PRESENT_MASK)
580 {
581 Log7Func(("PML4 iShw=%#x: %RX64 (%RGp) -> freeing it!\n", iShw, uPml4e, pPage->GCPhys));
582 pgmPoolFree(pVM, uPml4e & X86_PML4E_PG_MASK, pPage->idx, iShw);
583 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
584 }
585
586 /* paranoia / a bit assumptive. */
587 if ( (off & 7)
588 && (off & 7) + cbWrite > sizeof(X86PML4E))
589 {
590 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
591 X86PGPAEUINT const uPml4e2 = uShw.pPML4->a[iShw2].u;
592 if (uPml4e2 & EPT_PRESENT_MASK)
593 {
594 Log7Func(("PML4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPml4e2));
595 pgmPoolFree(pVM, uPml4e2 & X86_PML4E_PG_MASK, pPage->idx, iShw2);
596 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
597 }
598 }
599 break;
600 }
601
602 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
603 {
604 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
605 const unsigned iShw = off / sizeof(EPTPDPTE);
606 X86PGPAEUINT const uPdpte = uShw.pEptPdpt->a[iShw].u;
607 if (uPdpte & EPT_PRESENT_MASK)
608 {
609 Log7Func(("EPT PDPT iShw=%#x: %RX64 (%RGp) -> freeing it!\n", iShw, uPdpte, pPage->GCPhys));
610 pgmPoolFree(pVM, uPdpte & EPT_PDPTE_PG_MASK, pPage->idx, iShw);
611 ASMAtomicWriteU64(&uShw.pEptPdpt->a[iShw].u, 0);
612 }
613
614 /* paranoia / a bit assumptive. */
615 if ( (off & 7)
616 && (off & 7) + cbWrite > sizeof(EPTPDPTE))
617 {
618 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(EPTPDPTE);
619 X86PGPAEUINT const uPdpte2 = uShw.pEptPdpt->a[iShw2].u;
620 if (uPdpte2 & EPT_PRESENT_MASK)
621 {
622 Log7Func(("EPT PDPT iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPdpte2));
623 pgmPoolFree(pVM, uPdpte2 & EPT_PDPTE_PG_MASK, pPage->idx, iShw2);
624 ASMAtomicWriteU64(&uShw.pEptPdpt->a[iShw2].u, 0);
625 }
626 }
627 break;
628 }
629
630 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
631 {
632 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
633 const unsigned iShw = off / sizeof(EPTPDE);
634 X86PGPAEUINT const uPde = uShw.pEptPd->a[iShw].u;
635 if (uPde & EPT_PRESENT_MASK)
636 {
637 Assert(!(uPde & EPT_E_LEAF));
638 Log7Func(("EPT PD iShw=%#x: %RX64 (%RGp) -> freeing it!\n", iShw, uPde, pPage->GCPhys));
639 pgmPoolFree(pVM, uPde & EPT_PDE_PG_MASK, pPage->idx, iShw);
640 ASMAtomicWriteU64(&uShw.pEptPd->a[iShw].u, 0);
641 }
642
643 /* paranoia / a bit assumptive. */
644 if ( (off & 7)
645 && (off & 7) + cbWrite > sizeof(EPTPDE))
646 {
647 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(EPTPDE);
648 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pEptPd->a));
649 X86PGPAEUINT const uPde2 = uShw.pEptPd->a[iShw2].u;
650 if (uPde2 & EPT_PRESENT_MASK)
651 {
652 Assert(!(uPde2 & EPT_E_LEAF));
653 Log7Func(("EPT PD (2): iShw2=%#x: %RX64 (%RGp) -> freeing it!\n", iShw2, uPde2, pPage->GCPhys));
654 pgmPoolFree(pVM, uPde2 & EPT_PDE_PG_MASK, pPage->idx, iShw2);
655 ASMAtomicWriteU64(&uShw.pEptPd->a[iShw2].u, 0);
656 }
657 }
658 break;
659 }
660
661 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
662 {
663 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
664 const unsigned iShw = off / sizeof(EPTPTE);
665 X86PGPAEUINT const uPte = uShw.pEptPt->a[iShw].u;
666 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
667 if (uPte & EPT_PRESENT_MASK)
668 {
669 EPTPTE GstPte;
670 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
671 AssertRC(rc);
672
673 Log7Func(("EPT PT: iShw=%#x %RX64 (%RGp)\n", iShw, uPte, pPage->GCPhys));
674 pgmPoolTracDerefGCPhysHint(pPool, pPage,
675 uShw.pEptPt->a[iShw].u & EPT_PTE_PG_MASK,
676 GstPte.u & EPT_PTE_PG_MASK,
677 iShw);
678 ASMAtomicWriteU64(&uShw.pEptPt->a[iShw].u, 0);
679 }
680
681 /* paranoia / a bit assumptive. */
682 if ( (off & 7)
683 && (off & 7) + cbWrite > sizeof(EPTPTE))
684 {
685 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(EPTPTE);
686 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pEptPt->a));
687 X86PGPAEUINT const uPte2 = uShw.pEptPt->a[iShw2].u;
688 if (uPte2 & EPT_PRESENT_MASK)
689 {
690 EPTPTE GstPte;
691 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
692 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
693 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
694 AssertRC(rc);
695 Log7Func(("EPT PT (2): iShw=%#x %RX64 (%RGp)\n", iShw2, uPte2, pPage->GCPhys));
696 pgmPoolTracDerefGCPhysHint(pPool, pPage,
697 uShw.pEptPt->a[iShw2].u & EPT_PTE_PG_MASK,
698 GstPte.u & EPT_PTE_PG_MASK,
699 iShw2);
700 ASMAtomicWriteU64(&uShw.pEptPt->a[iShw2].u, 0);
701 }
702 }
703 break;
704 }
705#endif /* VBOX_WITH_NESTED_HWVIRT_VMX_EPT */
706
707 default:
708 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
709 }
710 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
711
712 /* next */
713 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
714 return;
715 pPage = &pPool->aPages[pPage->iMonitoredNext];
716 }
717}
718
719#ifndef IN_RING3
720
721/**
722 * Checks if a access could be a fork operation in progress.
723 *
724 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
725 *
726 * @returns true if it's likely that we're forking, otherwise false.
727 * @param pPool The pool.
728 * @param pDis The disassembled instruction.
729 * @param offFault The access offset.
730 */
731DECLINLINE(bool) pgmRZPoolMonitorIsForking(PPGMPOOL pPool, PDISSTATE pDis, unsigned offFault)
732{
733 /*
734 * i386 linux is using btr to clear X86_PTE_RW.
735 * The functions involved are (2.6.16 source inspection):
736 * clear_bit
737 * ptep_set_wrprotect
738 * copy_one_pte
739 * copy_pte_range
740 * copy_pmd_range
741 * copy_pud_range
742 * copy_page_range
743 * dup_mmap
744 * dup_mm
745 * copy_mm
746 * copy_process
747 * do_fork
748 */
749 if ( pDis->pCurInstr->uOpcode == OP_BTR
750 && !(offFault & 4)
751 /** @todo Validate that the bit index is X86_PTE_RW. */
752 )
753 {
754 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,Fork)); RT_NOREF_PV(pPool);
755 return true;
756 }
757 return false;
758}
759
760
761/**
762 * Determine whether the page is likely to have been reused.
763 *
764 * @returns true if we consider the page as being reused for a different purpose.
765 * @returns false if we consider it to still be a paging page.
766 * @param pVM The cross context VM structure.
767 * @param pVCpu The cross context virtual CPU structure.
768 * @param pCtx Pointer to the register context for the CPU.
769 * @param pDis The disassembly info for the faulting instruction.
770 * @param pvFault The fault address.
771 * @param pPage The pool page being accessed.
772 *
773 * @remark The REP prefix check is left to the caller because of STOSD/W.
774 */
775DECLINLINE(bool) pgmRZPoolMonitorIsReused(PVMCC pVM, PVMCPUCC pVCpu, PCPUMCTX pCtx, PDISSTATE pDis, RTGCPTR pvFault,
776 PPGMPOOLPAGE pPage)
777{
778 /* Locked (CR3, PDPTR*4) should not be reusable. Considering them as
779 such may cause loops booting tst-ubuntu-15_10-64-efi, ++. */
780 if (pPage->cLocked)
781 {
782 Log2(("pgmRZPoolMonitorIsReused: %RGv (%p) can't have been resued, because it's locked!\n", pvFault, pPage));
783 return false;
784 }
785
786 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
787 if ( HMHasPendingIrq(pVM)
788 && pCtx->rsp - pvFault < 32)
789 {
790 /* Fault caused by stack writes while trying to inject an interrupt event. */
791 Log(("pgmRZPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pCtx->rsp));
792 return true;
793 }
794
795 LogFlow(("Reused instr %RGv %d at %RGv param1.fUse=%llx param1.reg=%d\n", pCtx->rip, pDis->pCurInstr->uOpcode, pvFault, pDis->Param1.fUse, pDis->Param1.x86.Base.idxGenReg));
796
797 /* Non-supervisor mode write means it's used for something else. */
798 if (CPUMGetGuestCPL(pVCpu) == 3)
799 return true;
800
801 switch (pDis->pCurInstr->uOpcode)
802 {
803 /* call implies the actual push of the return address faulted */
804 case OP_CALL:
805 Log4(("pgmRZPoolMonitorIsReused: CALL\n"));
806 return true;
807 case OP_PUSH:
808 Log4(("pgmRZPoolMonitorIsReused: PUSH\n"));
809 return true;
810 case OP_PUSHF:
811 Log4(("pgmRZPoolMonitorIsReused: PUSHF\n"));
812 return true;
813 case OP_PUSHA:
814 Log4(("pgmRZPoolMonitorIsReused: PUSHA\n"));
815 return true;
816 case OP_FXSAVE:
817 Log4(("pgmRZPoolMonitorIsReused: FXSAVE\n"));
818 return true;
819 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
820 Log4(("pgmRZPoolMonitorIsReused: MOVNTI\n"));
821 return true;
822 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
823 Log4(("pgmRZPoolMonitorIsReused: MOVNTDQ\n"));
824 return true;
825 case OP_MOVSWD:
826 case OP_STOSWD:
827 if ( pDis->x86.fPrefix == (DISPREFIX_REP|DISPREFIX_REX)
828 && pCtx->rcx >= 0x40
829 )
830 {
831 Assert(pDis->uCpuMode == DISCPUMODE_64BIT);
832
833 Log(("pgmRZPoolMonitorIsReused: OP_STOSQ\n"));
834 return true;
835 }
836 break;
837
838 default:
839 /*
840 * Anything having ESP on the left side means stack writes.
841 */
842 if ( ( (pDis->Param1.fUse & DISUSE_REG_GEN32)
843 || (pDis->Param1.fUse & DISUSE_REG_GEN64))
844 && (pDis->Param1.x86.Base.idxGenReg == DISGREG_ESP))
845 {
846 Log4(("pgmRZPoolMonitorIsReused: ESP\n"));
847 return true;
848 }
849 break;
850 }
851
852 /*
853 * Page table updates are very very unlikely to be crossing page boundraries,
854 * and we don't want to deal with that in pgmPoolMonitorChainChanging and such.
855 */
856 uint32_t const cbWrite = DISGetParamSize(pDis, &pDis->Param1);
857 if ( (((uintptr_t)pvFault + cbWrite) >> X86_PAGE_SHIFT) != ((uintptr_t)pvFault >> X86_PAGE_SHIFT) )
858 {
859 Log4(("pgmRZPoolMonitorIsReused: cross page write\n"));
860 return true;
861 }
862
863 /*
864 * Nobody does an unaligned 8 byte write to a page table, right.
865 */
866 if (cbWrite >= 8 && ((uintptr_t)pvFault & 7) != 0)
867 {
868 Log4(("pgmRZPoolMonitorIsReused: Unaligned 8+ byte write\n"));
869 return true;
870 }
871
872 return false;
873}
874
875
876/**
877 * Flushes the page being accessed.
878 *
879 * @returns VBox status code suitable for scheduling.
880 * @param pVM The cross context VM structure.
881 * @param pVCpu The cross context virtual CPU structure.
882 * @param pPool The pool.
883 * @param pPage The pool page (head).
884 * @param pDis The disassembly of the write instruction.
885 * @param pCtx Pointer to the register context for the CPU.
886 * @param GCPhysFault The fault address as guest physical address.
887 * @todo VBOXSTRICTRC
888 */
889static int pgmRZPoolAccessPfHandlerFlush(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISSTATE pDis,
890 PCPUMCTX pCtx, RTGCPHYS GCPhysFault)
891{
892 NOREF(pVM); NOREF(GCPhysFault);
893
894 /*
895 * First, do the flushing.
896 */
897 pgmPoolMonitorChainFlush(pPool, pPage);
898
899 /*
900 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
901 * Must do this in raw mode (!); XP boot will fail otherwise.
902 */
903 int rc = VINF_SUCCESS;
904 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, pCtx->rip);
905 if (rc2 == VINF_SUCCESS)
906 { /* do nothing */ }
907 else if (rc2 == VINF_EM_RESCHEDULE)
908 {
909 rc = VBOXSTRICTRC_VAL(rc2);
910# ifndef IN_RING3
911 VMCPU_FF_SET(pVCpu, VMCPU_FF_TO_R3);
912# endif
913 }
914 else if (rc2 == VERR_EM_INTERPRETER)
915 {
916 rc = VINF_EM_RAW_EMULATE_INSTR;
917 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
918 }
919 else if (RT_FAILURE_NP(rc2))
920 rc = VBOXSTRICTRC_VAL(rc2);
921 else
922 AssertMsgFailed(("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
923
924 LogFlow(("pgmRZPoolAccessPfHandlerFlush: returns %Rrc (flushed)\n", rc));
925 return rc;
926}
927
928
929/**
930 * Handles the STOSD write accesses.
931 *
932 * @returns VBox status code suitable for scheduling.
933 * @param pVM The cross context VM structure.
934 * @param pPool The pool.
935 * @param pPage The pool page (head).
936 * @param pDis The disassembly of the write instruction.
937 * @param pCtx Pointer to the register context for the CPU.
938 * @param GCPhysFault The fault address as guest physical address.
939 * @param pvFault The fault address.
940 */
941DECLINLINE(int) pgmRZPoolAccessPfHandlerSTOSD(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISSTATE pDis,
942 PCPUMCTX pCtx, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
943{
944 unsigned uIncrement = pDis->Param1.x86.cb;
945 NOREF(pVM);
946
947 Assert(pDis->uCpuMode == DISCPUMODE_32BIT || pDis->uCpuMode == DISCPUMODE_64BIT);
948 Assert(pCtx->rcx <= 0x20);
949
950# ifdef VBOX_STRICT
951 if (pDis->x86.uOpMode == DISCPUMODE_32BIT)
952 Assert(uIncrement == 4);
953 else
954 Assert(uIncrement == 8);
955# endif
956
957 Log3(("pgmRZPoolAccessPfHandlerSTOSD\n"));
958
959 /*
960 * Increment the modification counter and insert it into the list
961 * of modified pages the first time.
962 */
963 if (!pPage->cModifications++)
964 pgmPoolMonitorModifiedInsert(pPool, pPage);
965
966 /*
967 * Execute REP STOSD.
968 *
969 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
970 * write situation, meaning that it's safe to write here.
971 */
972 PVMCPUCC pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
973 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
974 while (pCtx->rcx)
975 {
976 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
977 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pCtx->rax, uIncrement);
978 pu32 += uIncrement;
979 GCPhysFault += uIncrement;
980 pCtx->rdi += uIncrement;
981 pCtx->rcx--;
982 }
983 pCtx->rip += pDis->cbInstr;
984
985 LogFlow(("pgmRZPoolAccessPfHandlerSTOSD: returns\n"));
986 return VINF_SUCCESS;
987}
988
989
990/**
991 * Handles the simple write accesses.
992 *
993 * @returns VBox status code suitable for scheduling.
994 * @param pVM The cross context VM structure.
995 * @param pVCpu The cross context virtual CPU structure.
996 * @param pPool The pool.
997 * @param pPage The pool page (head).
998 * @param pDis The disassembly of the write instruction.
999 * @param pCtx Pointer to the register context for the CPU.
1000 * @param GCPhysFault The fault address as guest physical address.
1001 * @param pfReused Reused state (in/out)
1002 */
1003DECLINLINE(int) pgmRZPoolAccessPfHandlerSimple(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISSTATE pDis,
1004 PCPUMCTX pCtx, RTGCPHYS GCPhysFault, bool *pfReused)
1005{
1006 Log3(("pgmRZPoolAccessPfHandlerSimple\n"));
1007 NOREF(pVM);
1008 NOREF(pfReused); /* initialized by caller */
1009
1010 /*
1011 * Increment the modification counter and insert it into the list
1012 * of modified pages the first time.
1013 */
1014 if (!pPage->cModifications++)
1015 pgmPoolMonitorModifiedInsert(pPool, pPage);
1016
1017 /*
1018 * Clear all the pages.
1019 */
1020 uint32_t cbWrite = DISGetParamSize(pDis, &pDis->Param1);
1021 if (cbWrite <= 8)
1022 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, cbWrite);
1023 else if (cbWrite <= 16)
1024 {
1025 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, 8);
1026 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + 8, NULL, cbWrite - 8);
1027 }
1028 else
1029 {
1030 Assert(cbWrite <= 32);
1031 for (uint32_t off = 0; off < cbWrite; off += 8)
1032 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + off, NULL, RT_MIN(8, cbWrite - off));
1033 }
1034
1035 /*
1036 * Interpret the instruction.
1037 */
1038 VBOXSTRICTRC rc = EMInterpretInstructionDisasState(pVCpu, pDis, pCtx->rip);
1039 if (RT_SUCCESS(rc))
1040 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
1041 else if (rc == VERR_EM_INTERPRETER)
1042 {
1043 LogFlow(("pgmRZPoolAccessPfHandlerSimple: Interpretation failed for %04x:%RGv - opcode=%d\n",
1044 pCtx->cs.Sel, (RTGCPTR)pCtx->rip, pDis->pCurInstr->uOpcode));
1045 rc = VINF_EM_RAW_EMULATE_INSTR;
1046 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
1047 }
1048
1049# if 0 /* experimental code */
1050 if (rc == VINF_SUCCESS)
1051 {
1052 switch (pPage->enmKind)
1053 {
1054 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1055 {
1056 X86PTEPAE GstPte;
1057 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1058 AssertRC(rc);
1059
1060 /* Check the new value written by the guest. If present and with a bogus physical address, then
1061 * it's fairly safe to assume the guest is reusing the PT.
1062 */
1063 if (GstPte.n.u1Present)
1064 {
1065 RTHCPHYS HCPhys = -1;
1066 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1067 if (rc != VINF_SUCCESS)
1068 {
1069 *pfReused = true;
1070 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1071 }
1072 }
1073 break;
1074 }
1075 }
1076 }
1077# endif
1078
1079 LogFlow(("pgmRZPoolAccessPfHandlerSimple: returns %Rrc\n", VBOXSTRICTRC_VAL(rc)));
1080 return VBOXSTRICTRC_VAL(rc);
1081}
1082
1083
1084/**
1085 * @callback_method_impl{FNPGMRZPHYSPFHANDLER,
1086 * \#PF access handler callback for page table pages.}
1087 *
1088 * @remarks The @a uUser argument is the index of the PGMPOOLPAGE.
1089 */
1090DECLCALLBACK(VBOXSTRICTRC) pgmRZPoolAccessPfHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCUINT uErrorCode, PCPUMCTX pCtx,
1091 RTGCPTR pvFault, RTGCPHYS GCPhysFault, uint64_t uUser)
1092{
1093 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorRZ, a);
1094 PPGMPOOL const pPool = pVM->pgm.s.CTX_SUFF(pPool);
1095 AssertReturn(uUser < pPool->cCurPages, VERR_PGM_POOL_IPE);
1096 PPGMPOOLPAGE const pPage = &pPool->aPages[uUser];
1097 unsigned cMaxModifications;
1098 bool fForcedFlush = false;
1099 RT_NOREF_PV(uErrorCode);
1100
1101# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
1102 AssertMsg(pVCpu->pgm.s.enmGuestSlatMode == PGMSLAT_DIRECT,
1103 ("pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1104# endif
1105 LogFlow(("pgmRZPoolAccessPfHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1106
1107 PGM_LOCK_VOID(pVM);
1108 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1109 {
1110 /* Pool page changed while we were waiting for the lock; ignore. */
1111 Log(("CPU%d: pgmRZPoolAccessPfHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1112 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
1113 PGM_UNLOCK(pVM);
1114 return VINF_SUCCESS;
1115 }
1116# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1117 if (pPage->fDirty)
1118 {
1119# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
1120 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage));
1121# endif
1122 Assert(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH));
1123 PGM_UNLOCK(pVM);
1124 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1125 }
1126# endif
1127
1128# if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1129 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1130 {
1131 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1132 void *pvGst;
1133 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1134 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1135 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1136 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1137 }
1138# endif
1139
1140# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
1141 if (PGMPOOL_PAGE_IS_NESTED(pPage))
1142 {
1143 Assert(!CPUMIsGuestInVmxNonRootMode(CPUMQueryGuestCtxPtr(pVCpu)));
1144 Log7Func(("Flushing pvFault=%RGv GCPhysFault=%RGp\n", pvFault, GCPhysFault));
1145 pgmPoolMonitorChainFlush(pPool, pPage);
1146 PGM_UNLOCK(pVM);
1147 return VINF_SUCCESS;
1148 }
1149# endif
1150
1151 /*
1152 * Disassemble the faulting instruction.
1153 */
1154 PDISSTATE pDis = &pVCpu->pgm.s.Dis;
1155 int rc = EMInterpretDisasCurrent(pVCpu, pDis, NULL);
1156 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1157 {
1158 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1159 PGM_UNLOCK(pVM);
1160 return rc;
1161 }
1162
1163 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1164
1165 /*
1166 * We should ALWAYS have the list head as user parameter. This
1167 * is because we use that page to record the changes.
1168 */
1169 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1170
1171# ifdef IN_RING0
1172 /* Maximum nr of modifications depends on the page type. */
1173 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1174 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1175 cMaxModifications = 4;
1176 else
1177 cMaxModifications = 24;
1178# else
1179 cMaxModifications = 48;
1180# endif
1181
1182 /*
1183 * Incremental page table updates should weigh more than random ones.
1184 * (Only applies when started from offset 0)
1185 */
1186 pVCpu->pgm.s.cPoolAccessHandler++;
1187 if ( pPage->GCPtrLastAccessHandlerRip >= pCtx->rip - 0x40 /* observed loops in Windows 7 x64 */
1188 && pPage->GCPtrLastAccessHandlerRip < pCtx->rip + 0x40
1189 && pvFault == (pPage->GCPtrLastAccessHandlerFault + pDis->Param1.x86.cb)
1190 && pVCpu->pgm.s.cPoolAccessHandler == pPage->cLastAccessHandler + 1)
1191 {
1192 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1193 Assert(pPage->cModifications < 32000);
1194 pPage->cModifications = pPage->cModifications * 2;
1195 pPage->GCPtrLastAccessHandlerFault = pvFault;
1196 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1197 if (pPage->cModifications >= cMaxModifications)
1198 {
1199 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushReinit);
1200 fForcedFlush = true;
1201 }
1202 }
1203
1204 if (pPage->cModifications >= cMaxModifications)
1205 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1206
1207 /*
1208 * Check if it's worth dealing with.
1209 */
1210 bool fReused = false;
1211 bool fNotReusedNotForking = false;
1212 if ( ( pPage->cModifications < cMaxModifications /** @todo \#define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1213 || pgmPoolIsPageLocked(pPage)
1214 )
1215 && !(fReused = pgmRZPoolMonitorIsReused(pVM, pVCpu, pCtx, pDis, pvFault, pPage))
1216 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1217 {
1218 /*
1219 * Simple instructions, no REP prefix.
1220 */
1221 if (!(pDis->x86.fPrefix & (DISPREFIX_REP | DISPREFIX_REPNE)))
1222 {
1223 rc = pgmRZPoolAccessPfHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pCtx, GCPhysFault, &fReused);
1224 if (fReused)
1225 goto flushPage;
1226
1227 /* A mov instruction to change the first page table entry will be remembered so we can detect
1228 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1229 */
1230 if ( rc == VINF_SUCCESS
1231 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1232 && pDis->pCurInstr->uOpcode == OP_MOV
1233 && (pvFault & PAGE_OFFSET_MASK) == 0)
1234 {
1235 pPage->GCPtrLastAccessHandlerFault = pvFault;
1236 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1237 pPage->GCPtrLastAccessHandlerRip = pCtx->rip;
1238 /* Make sure we don't kick out a page too quickly. */
1239 if (pPage->cModifications > 8)
1240 pPage->cModifications = 2;
1241 }
1242 else if (pPage->GCPtrLastAccessHandlerFault == pvFault)
1243 {
1244 /* ignore the 2nd write to this page table entry. */
1245 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1246 }
1247 else
1248 {
1249 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
1250 pPage->GCPtrLastAccessHandlerRip = 0;
1251 }
1252
1253 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
1254 PGM_UNLOCK(pVM);
1255 return rc;
1256 }
1257
1258 /*
1259 * Windows is frequently doing small memset() operations (netio test 4k+).
1260 * We have to deal with these or we'll kill the cache and performance.
1261 */
1262 if ( pDis->pCurInstr->uOpcode == OP_STOSWD
1263 && !pCtx->eflags.Bits.u1DF
1264 && pDis->x86.uOpMode == pDis->uCpuMode
1265 && pDis->x86.uAddrMode == pDis->uCpuMode)
1266 {
1267 bool fValidStosd = false;
1268
1269 if ( pDis->uCpuMode == DISCPUMODE_32BIT
1270 && pDis->x86.fPrefix == DISPREFIX_REP
1271 && pCtx->ecx <= 0x20
1272 && pCtx->ecx * 4 <= GUEST_PAGE_SIZE - ((uintptr_t)pvFault & GUEST_PAGE_OFFSET_MASK)
1273 && !((uintptr_t)pvFault & 3)
1274 && (pCtx->eax == 0 || pCtx->eax == 0x80) /* the two values observed. */
1275 )
1276 {
1277 fValidStosd = true;
1278 pCtx->rcx &= 0xffffffff; /* paranoia */
1279 }
1280 else
1281 if ( pDis->uCpuMode == DISCPUMODE_64BIT
1282 && pDis->x86.fPrefix == (DISPREFIX_REP | DISPREFIX_REX)
1283 && pCtx->rcx <= 0x20
1284 && pCtx->rcx * 8 <= GUEST_PAGE_SIZE - ((uintptr_t)pvFault & GUEST_PAGE_OFFSET_MASK)
1285 && !((uintptr_t)pvFault & 7)
1286 && (pCtx->rax == 0 || pCtx->rax == 0x80) /* the two values observed. */
1287 )
1288 {
1289 fValidStosd = true;
1290 }
1291
1292 if (fValidStosd)
1293 {
1294 rc = pgmRZPoolAccessPfHandlerSTOSD(pVM, pPool, pPage, pDis, pCtx, GCPhysFault, pvFault);
1295 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZRepStosd, a);
1296 PGM_UNLOCK(pVM);
1297 return rc;
1298 }
1299 }
1300
1301 /* REP prefix, don't bother. */
1302 STAM_COUNTER_INC(&pPool->StatMonitorPfRZRepPrefix);
1303 Log4(("pgmRZPoolAccessPfHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1304 pCtx->eax, pCtx->ecx, pCtx->edi, pCtx->esi, (RTGCPTR)pCtx->rip, pDis->pCurInstr->uOpcode, pDis->x86.fPrefix));
1305 fNotReusedNotForking = true;
1306 }
1307
1308# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1309 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1310 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1311 */
1312 if ( pPage->cModifications >= cMaxModifications
1313 && !fForcedFlush
1314 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1315 && ( fNotReusedNotForking
1316 || ( !pgmRZPoolMonitorIsReused(pVM, pVCpu, pCtx, pDis, pvFault, pPage)
1317 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1318 )
1319 )
1320 {
1321 Assert(!pgmPoolIsPageLocked(pPage));
1322 Assert(pPage->fDirty == false);
1323
1324 /* Flush any monitored duplicates as we will disable write protection. */
1325 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1326 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1327 {
1328 PPGMPOOLPAGE pPageHead = pPage;
1329
1330 /* Find the monitor head. */
1331 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1332 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1333
1334 while (pPageHead)
1335 {
1336 unsigned idxNext = pPageHead->iMonitoredNext;
1337
1338 if (pPageHead != pPage)
1339 {
1340 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1341 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1342 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1343 AssertRC(rc2);
1344 }
1345
1346 if (idxNext == NIL_PGMPOOL_IDX)
1347 break;
1348
1349 pPageHead = &pPool->aPages[idxNext];
1350 }
1351 }
1352
1353 /* The flushing above might fail for locked pages, so double check. */
1354 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1355 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1356 {
1357 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1358
1359 /* Temporarily allow write access to the page table again. */
1360 rc = PGMHandlerPhysicalPageTempOff(pVM,
1361 pPage->GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK,
1362 pPage->GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
1363 if (rc == VINF_SUCCESS)
1364 {
1365 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1366 AssertMsg(rc == VINF_SUCCESS
1367 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1368 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1369 || rc == VERR_PAGE_NOT_PRESENT,
1370 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1371# ifdef VBOX_STRICT
1372 pPage->GCPtrDirtyFault = pvFault;
1373# endif
1374
1375 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, a);
1376 PGM_UNLOCK(pVM);
1377 return rc;
1378 }
1379 }
1380 }
1381# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT && IN_RING0 */
1382
1383 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushModOverflow);
1384flushPage:
1385 /*
1386 * Not worth it, so flush it.
1387 *
1388 * If we considered it to be reused, don't go back to ring-3
1389 * to emulate failed instructions since we usually cannot
1390 * interpret then. This may be a bit risky, in which case
1391 * the reuse detection must be fixed.
1392 */
1393 rc = pgmRZPoolAccessPfHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pCtx, GCPhysFault);
1394 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1395 && fReused)
1396 {
1397 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage)); /* temporary, remove later. */
1398 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1399 if (PGMShwGetPage(pVCpu, pCtx->rip, NULL, NULL) == VINF_SUCCESS)
1400 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1401 }
1402 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZFlushPage, a);
1403 PGM_UNLOCK(pVM);
1404 return rc;
1405}
1406
1407#endif /* !IN_RING3 */
1408
1409/**
1410 * @callback_method_impl{FNPGMPHYSHANDLER,
1411 * Access handler for shadowed page table pages.}
1412 *
1413 * @remarks Only uses the VINF_PGM_HANDLER_DO_DEFAULT status.
1414 * @note The @a uUser argument is the index of the PGMPOOLPAGE.
1415 */
1416DECLCALLBACK(VBOXSTRICTRC)
1417pgmPoolAccessHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf,
1418 PGMACCESSTYPE enmAccessType, PGMACCESSORIGIN enmOrigin, uint64_t uUser)
1419{
1420 PPGMPOOL const pPool = pVM->pgm.s.CTX_SUFF(pPool);
1421 STAM_PROFILE_START(&pPool->CTX_SUFF_Z(StatMonitor), a);
1422 AssertReturn(uUser < pPool->cCurPages, VERR_PGM_POOL_IPE);
1423 PPGMPOOLPAGE const pPage = &pPool->aPages[uUser];
1424 LogFlow(("PGM_ALL_CB_DECL: GCPhys=%RGp %p:{.Core=%RHp, .idx=%d, .GCPhys=%RGp, .enmType=%d}\n",
1425 GCPhys, pPage, pPage->Core.Key, pPage->idx, pPage->GCPhys, pPage->enmKind));
1426
1427 NOREF(pvPhys); NOREF(pvBuf); NOREF(enmAccessType);
1428
1429 PGM_LOCK_VOID(pVM);
1430
1431#ifdef VBOX_WITH_STATISTICS
1432 /*
1433 * Collect stats on the access.
1434 */
1435 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Sizes)) == 19);
1436 if (cbBuf <= 16 && cbBuf > 0)
1437 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[cbBuf - 1]);
1438 else if (cbBuf >= 17 && cbBuf < 32)
1439 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[16]);
1440 else if (cbBuf >= 32 && cbBuf < 64)
1441 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[17]);
1442 else if (cbBuf >= 64)
1443 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[18]);
1444
1445 uint8_t cbAlign;
1446 switch (pPage->enmKind)
1447 {
1448 default:
1449 cbAlign = 7;
1450 break;
1451 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1452 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1453 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1454 case PGMPOOLKIND_32BIT_PD:
1455 case PGMPOOLKIND_32BIT_PD_PHYS:
1456 cbAlign = 3;
1457 break;
1458 }
1459 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Misaligned)) == 7);
1460 if ((uint8_t)GCPhys & cbAlign)
1461 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Misaligned)[((uint8_t)GCPhys & cbAlign) - 1]);
1462#endif
1463
1464 /*
1465 * Make sure the pool page wasn't modified by a different CPU.
1466 */
1467 if (PHYS_PAGE_ADDRESS(GCPhys) == PHYS_PAGE_ADDRESS(pPage->GCPhys))
1468 {
1469 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1470
1471 /* The max modification count before flushing depends on the context and page type. */
1472#ifdef IN_RING3
1473 uint16_t const cMaxModifications = 96; /* it's cheaper here, right? */
1474#else
1475 uint16_t cMaxModifications;
1476 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1477 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1478 cMaxModifications = 4;
1479 else
1480 cMaxModifications = 24;
1481#endif
1482
1483 /*
1484 * We don't have to be very sophisticated about this since there are relativly few calls here.
1485 * However, we must try our best to detect any non-cpu accesses (disk / networking).
1486 */
1487 if ( ( pPage->cModifications < cMaxModifications
1488 || pgmPoolIsPageLocked(pPage) )
1489 && enmOrigin != PGMACCESSORIGIN_DEVICE
1490 && cbBuf <= 16)
1491 {
1492 /* Clear the shadow entry. */
1493 if (!pPage->cModifications++)
1494 pgmPoolMonitorModifiedInsert(pPool, pPage);
1495
1496 if (cbBuf <= 8)
1497 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, (uint32_t)cbBuf);
1498 else
1499 {
1500 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, 8);
1501 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys + 8, (uint8_t *)pvBuf + 8, (uint32_t)cbBuf - 8);
1502 }
1503 }
1504 else
1505 pgmPoolMonitorChainFlush(pPool, pPage);
1506
1507 STAM_PROFILE_STOP_EX(&pPool->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1508 }
1509 else
1510 Log(("CPU%d: PGM_ALL_CB_DECL pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhys), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1511 PGM_UNLOCK(pVM);
1512 return VINF_PGM_HANDLER_DO_DEFAULT;
1513}
1514
1515
1516#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1517
1518# if defined(VBOX_STRICT) && !defined(IN_RING3)
1519
1520/**
1521 * Check references to guest physical memory in a PAE / PAE page table.
1522 *
1523 * @param pPool The pool.
1524 * @param pPage The page.
1525 * @param pShwPT The shadow page table (mapping of the page).
1526 * @param pGstPT The guest page table.
1527 */
1528static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1529{
1530 unsigned cErrors = 0;
1531 int LastRc = -1; /* initialized to shut up gcc */
1532 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1533 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1534 PVMCC pVM = pPool->CTX_SUFF(pVM);
1535
1536# ifdef VBOX_STRICT
1537 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1538 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1539# endif
1540 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1541 {
1542 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1543 {
1544 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1545 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1546 if ( rc != VINF_SUCCESS
1547 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1548 {
1549 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1550 LastPTE = i;
1551 LastRc = rc;
1552 LastHCPhys = HCPhys;
1553 cErrors++;
1554
1555 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1556 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1557 AssertRC(rc);
1558
1559 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1560 {
1561 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1562
1563 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1564 {
1565 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1566
1567 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1568 {
1569 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1570 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1571 {
1572 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1573 }
1574 }
1575
1576 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1577 }
1578 }
1579 }
1580 }
1581 }
1582 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1583}
1584
1585
1586/**
1587 * Check references to guest physical memory in a PAE / 32-bit page table.
1588 *
1589 * @param pPool The pool.
1590 * @param pPage The page.
1591 * @param pShwPT The shadow page table (mapping of the page).
1592 * @param pGstPT The guest page table.
1593 */
1594static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1595{
1596 unsigned cErrors = 0;
1597 int LastRc = -1; /* initialized to shut up gcc */
1598 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1599 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1600 PVMCC pVM = pPool->CTX_SUFF(pVM);
1601
1602# ifdef VBOX_STRICT
1603 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1604 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1605# endif
1606 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1607 {
1608 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1609 {
1610 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1611 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1612 if ( rc != VINF_SUCCESS
1613 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1614 {
1615 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1616 LastPTE = i;
1617 LastRc = rc;
1618 LastHCPhys = HCPhys;
1619 cErrors++;
1620
1621 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1622 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1623 AssertRC(rc);
1624
1625 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1626 {
1627 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1628
1629 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1630 {
1631 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1632
1633 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1634 {
1635 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1636 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1637 {
1638 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1639 }
1640 }
1641
1642 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1643 }
1644 }
1645 }
1646 }
1647 }
1648 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1649}
1650
1651# endif /* VBOX_STRICT && !IN_RING3 */
1652
1653/**
1654 * Clear references to guest physical memory in a PAE / PAE page table.
1655 *
1656 * @returns nr of changed PTEs
1657 * @param pPool The pool.
1658 * @param pPage The page.
1659 * @param pShwPT The shadow page table (mapping of the page).
1660 * @param pGstPT The guest page table.
1661 * @param pOldGstPT The old cached guest page table.
1662 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1663 * @param pfFlush Flush reused page table (out)
1664 */
1665DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1666 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1667{
1668 unsigned cChanged = 0;
1669
1670# ifdef VBOX_STRICT
1671 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1672 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1673# endif
1674 *pfFlush = false;
1675
1676 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1677 {
1678 /* Check the new value written by the guest. If present and with a bogus physical address, then
1679 * it's fairly safe to assume the guest is reusing the PT.
1680 */
1681 if ( fAllowRemoval
1682 && (pGstPT->a[i].u & X86_PTE_P))
1683 {
1684 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1685 {
1686 *pfFlush = true;
1687 return ++cChanged;
1688 }
1689 }
1690 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1691 {
1692 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1693 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1694 {
1695# ifdef VBOX_STRICT
1696 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1697 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1698 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1699# endif
1700 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1701 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1702 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1703 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1704
1705 if ( uHostAttr == uGuestAttr
1706 && fHostRW <= fGuestRW)
1707 continue;
1708 }
1709 cChanged++;
1710 /* Something was changed, so flush it. */
1711 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1712 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1713 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1714 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1715 }
1716 }
1717 return cChanged;
1718}
1719
1720
1721/**
1722 * Clear references to guest physical memory in a PAE / PAE page table.
1723 *
1724 * @returns nr of changed PTEs
1725 * @param pPool The pool.
1726 * @param pPage The page.
1727 * @param pShwPT The shadow page table (mapping of the page).
1728 * @param pGstPT The guest page table.
1729 * @param pOldGstPT The old cached guest page table.
1730 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1731 * @param pfFlush Flush reused page table (out)
1732 */
1733DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1734 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1735{
1736 unsigned cChanged = 0;
1737
1738# ifdef VBOX_STRICT
1739 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1740 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1741# endif
1742 *pfFlush = false;
1743
1744 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1745 {
1746 /* Check the new value written by the guest. If present and with a bogus physical address, then
1747 * it's fairly safe to assume the guest is reusing the PT. */
1748 if (fAllowRemoval)
1749 {
1750 X86PGUINT const uPte = pGstPT->a[i].u;
1751 if ( (uPte & X86_PTE_P)
1752 && !PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), uPte & X86_PTE_PG_MASK))
1753 {
1754 *pfFlush = true;
1755 return ++cChanged;
1756 }
1757 }
1758 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1759 {
1760 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1761 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1762 {
1763# ifdef VBOX_STRICT
1764 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1765 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1766 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1767# endif
1768 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1769 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1770 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1771 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1772
1773 if ( uHostAttr == uGuestAttr
1774 && fHostRW <= fGuestRW)
1775 continue;
1776 }
1777 cChanged++;
1778 /* Something was changed, so flush it. */
1779 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1780 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1781 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1782 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1783 }
1784 }
1785 return cChanged;
1786}
1787
1788
1789/**
1790 * Flush a dirty page
1791 *
1792 * @param pVM The cross context VM structure.
1793 * @param pPool The pool.
1794 * @param idxSlot Dirty array slot index
1795 * @param fAllowRemoval Allow a reused page table to be removed
1796 */
1797static void pgmPoolFlushDirtyPage(PVMCC pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1798{
1799 AssertCompile(RT_ELEMENTS(pPool->aidxDirtyPages) == RT_ELEMENTS(pPool->aDirtyPages));
1800
1801 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1802 unsigned idxPage = pPool->aidxDirtyPages[idxSlot];
1803 if (idxPage == NIL_PGMPOOL_IDX)
1804 return;
1805
1806 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1807 Assert(pPage->idx == idxPage);
1808 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1809
1810 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1811 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1812
1813 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1814 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
1815 Assert(rc == VINF_SUCCESS);
1816 pPage->fDirty = false;
1817
1818# ifdef VBOX_STRICT
1819 uint64_t fFlags = 0;
1820 RTHCPHYS HCPhys;
1821 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->GCPtrDirtyFault, &fFlags, &HCPhys);
1822 AssertMsg( ( rc == VINF_SUCCESS
1823 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1824 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1825 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1826 || rc == VERR_PAGE_NOT_PRESENT,
1827 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->GCPtrDirtyFault, rc, fFlags));
1828# endif
1829
1830 /* Flush those PTEs that have changed. */
1831 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1832 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1833 void *pvGst;
1834 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1835 bool fFlush;
1836 unsigned cChanges;
1837
1838 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1839 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1840 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1841 else
1842 {
1843 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage)); /* temporary, remove later. */
1844 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1845 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1846 }
1847
1848 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1849 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1850 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1851 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1852
1853 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1854 Assert(pPage->cModifications);
1855 if (cChanges < 4)
1856 pPage->cModifications = 1; /* must use > 0 here */
1857 else
1858 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1859
1860 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1861 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1862 pPool->idxFreeDirtyPage = idxSlot;
1863
1864 pPool->cDirtyPages--;
1865 pPool->aidxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1866 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1867 if (fFlush)
1868 {
1869 Assert(fAllowRemoval);
1870 Log(("Flush reused page table!\n"));
1871 pgmPoolFlushPage(pPool, pPage);
1872 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1873 }
1874 else
1875 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1876}
1877
1878
1879# ifndef IN_RING3
1880/**
1881 * Add a new dirty page
1882 *
1883 * @param pVM The cross context VM structure.
1884 * @param pPool The pool.
1885 * @param pPage The page.
1886 */
1887void pgmPoolAddDirtyPage(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1888{
1889 PGM_LOCK_ASSERT_OWNER(pVM);
1890 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1891 Assert(!pPage->fDirty);
1892 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage));
1893
1894 unsigned idxFree = pPool->idxFreeDirtyPage;
1895 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1896 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1897
1898 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1899 {
1900 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1901 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1902 }
1903 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1904 AssertMsg(pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1905
1906 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1907
1908 /*
1909 * Make a copy of the guest page table as we require valid GCPhys addresses
1910 * when removing references to physical pages.
1911 * (The HCPhys linear lookup is *extremely* expensive!)
1912 */
1913 void *pvGst;
1914 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1915 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst,
1916 pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT ? PAGE_SIZE : PAGE_SIZE / 2);
1917# ifdef VBOX_STRICT
1918 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1919 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1920 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1921 else
1922 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1923 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1924# endif
1925 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1926
1927 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1928 pPage->fDirty = true;
1929 pPage->idxDirtyEntry = (uint8_t)idxFree; Assert(pPage->idxDirtyEntry == idxFree);
1930 pPool->aidxDirtyPages[idxFree] = pPage->idx;
1931 pPool->cDirtyPages++;
1932
1933 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1934 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1935 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1936 {
1937 unsigned i;
1938 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1939 {
1940 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1941 if (pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1942 {
1943 pPool->idxFreeDirtyPage = idxFree;
1944 break;
1945 }
1946 }
1947 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1948 }
1949
1950 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1951
1952 /*
1953 * Clear all references to this shadow table. See @bugref{7298}.
1954 */
1955 pgmPoolTrackClearPageUsers(pPool, pPage);
1956}
1957# endif /* !IN_RING3 */
1958
1959
1960/**
1961 * Check if the specified page is dirty (not write monitored)
1962 *
1963 * @return dirty or not
1964 * @param pVM The cross context VM structure.
1965 * @param GCPhys Guest physical address
1966 */
1967bool pgmPoolIsDirtyPageSlow(PVMCC pVM, RTGCPHYS GCPhys)
1968{
1969 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1970 PGM_LOCK_ASSERT_OWNER(pVM);
1971 if (!pPool->cDirtyPages)
1972 return false;
1973
1974 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1975
1976 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1977 {
1978 unsigned idxPage = pPool->aidxDirtyPages[i];
1979 if (idxPage != NIL_PGMPOOL_IDX)
1980 {
1981 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1982 if (pPage->GCPhys == GCPhys)
1983 return true;
1984 }
1985 }
1986 return false;
1987}
1988
1989
1990/**
1991 * Reset all dirty pages by reinstating page monitoring.
1992 *
1993 * @param pVM The cross context VM structure.
1994 */
1995void pgmPoolResetDirtyPages(PVMCC pVM)
1996{
1997 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1998 PGM_LOCK_ASSERT_OWNER(pVM);
1999 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
2000
2001 if (!pPool->cDirtyPages)
2002 return;
2003
2004 Log(("pgmPoolResetDirtyPages\n"));
2005 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2006 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
2007
2008 pPool->idxFreeDirtyPage = 0;
2009 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
2010 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
2011 {
2012 unsigned i;
2013 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2014 {
2015 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
2016 {
2017 pPool->idxFreeDirtyPage = i;
2018 break;
2019 }
2020 }
2021 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
2022 }
2023
2024 Assert(pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
2025 return;
2026}
2027
2028
2029/**
2030 * Invalidate the PT entry for the specified page
2031 *
2032 * @param pVM The cross context VM structure.
2033 * @param GCPtrPage Guest page to invalidate
2034 */
2035void pgmPoolResetDirtyPage(PVMCC pVM, RTGCPTR GCPtrPage)
2036{
2037 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2038 PGM_LOCK_ASSERT_OWNER(pVM);
2039 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
2040
2041 if (!pPool->cDirtyPages)
2042 return;
2043
2044 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage)); RT_NOREF_PV(GCPtrPage);
2045 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2046 {
2047 /** @todo What was intended here??? This looks incomplete... */
2048 }
2049}
2050
2051
2052/**
2053 * Reset all dirty pages by reinstating page monitoring.
2054 *
2055 * @param pVM The cross context VM structure.
2056 * @param GCPhysPT Physical address of the page table
2057 */
2058void pgmPoolInvalidateDirtyPage(PVMCC pVM, RTGCPHYS GCPhysPT)
2059{
2060 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2061 PGM_LOCK_ASSERT_OWNER(pVM);
2062 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
2063 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
2064
2065 if (!pPool->cDirtyPages)
2066 return;
2067
2068 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2069
2070 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2071 {
2072 unsigned idxPage = pPool->aidxDirtyPages[i];
2073 if (idxPage != NIL_PGMPOOL_IDX)
2074 {
2075 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
2076 if (pPage->GCPhys == GCPhysPT)
2077 {
2078 idxDirtyPage = i;
2079 break;
2080 }
2081 }
2082 }
2083
2084 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
2085 {
2086 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
2087 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
2088 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
2089 {
2090 unsigned i;
2091 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2092 {
2093 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
2094 {
2095 pPool->idxFreeDirtyPage = i;
2096 break;
2097 }
2098 }
2099 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
2100 }
2101 }
2102}
2103
2104#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
2105
2106/**
2107 * Inserts a page into the GCPhys hash table.
2108 *
2109 * @param pPool The pool.
2110 * @param pPage The page.
2111 */
2112DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2113{
2114 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
2115 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2116 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2117 pPage->iNext = pPool->aiHash[iHash];
2118 pPool->aiHash[iHash] = pPage->idx;
2119}
2120
2121
2122/**
2123 * Removes a page from the GCPhys hash table.
2124 *
2125 * @param pPool The pool.
2126 * @param pPage The page.
2127 */
2128DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2129{
2130 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
2131 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2132 if (pPool->aiHash[iHash] == pPage->idx)
2133 pPool->aiHash[iHash] = pPage->iNext;
2134 else
2135 {
2136 uint16_t iPrev = pPool->aiHash[iHash];
2137 for (;;)
2138 {
2139 const int16_t i = pPool->aPages[iPrev].iNext;
2140 if (i == pPage->idx)
2141 {
2142 pPool->aPages[iPrev].iNext = pPage->iNext;
2143 break;
2144 }
2145 if (i == NIL_PGMPOOL_IDX)
2146 {
2147 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
2148 break;
2149 }
2150 iPrev = i;
2151 }
2152 }
2153 pPage->iNext = NIL_PGMPOOL_IDX;
2154}
2155
2156
2157/**
2158 * Frees up one cache page.
2159 *
2160 * @returns VBox status code.
2161 * @retval VINF_SUCCESS on success.
2162 * @param pPool The pool.
2163 * @param iUser The user index.
2164 */
2165static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
2166{
2167#ifndef VBOX_VMM_TARGET_ARMV8
2168 const PVMCC pVM = pPool->CTX_SUFF(pVM);
2169#endif
2170 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
2171 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
2172
2173 /*
2174 * Select one page from the tail of the age list.
2175 */
2176 PPGMPOOLPAGE pPage;
2177 for (unsigned iLoop = 0; ; iLoop++)
2178 {
2179 uint16_t iToFree = pPool->iAgeTail;
2180 if (iToFree == iUser && iUser != NIL_PGMPOOL_IDX)
2181 iToFree = pPool->aPages[iToFree].iAgePrev;
2182/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2183 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2184 {
2185 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2186 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2187 {
2188 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2189 continue;
2190 iToFree = i;
2191 break;
2192 }
2193 }
2194*/
2195 Assert(iToFree != iUser);
2196 AssertReleaseMsg(iToFree != NIL_PGMPOOL_IDX,
2197 ("iToFree=%#x (iAgeTail=%#x) iUser=%#x iLoop=%u - pPool=%p LB %#zx\n",
2198 iToFree, pPool->iAgeTail, iUser, iLoop, pPool,
2199 RT_UOFFSETOF_DYN(PGMPOOL, aPages[pPool->cMaxPages])
2200 + pPool->cMaxUsers * sizeof(PGMPOOLUSER)
2201 + pPool->cMaxPhysExts * sizeof(PGMPOOLPHYSEXT) ));
2202
2203 pPage = &pPool->aPages[iToFree];
2204
2205 /*
2206 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2207 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2208 */
2209 if ( !pgmPoolIsPageLocked(pPage)
2210 && pPage->idx >= PGMPOOL_IDX_FIRST /* paranoia (#6349) */)
2211 break;
2212 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2213 pgmPoolCacheUsed(pPool, pPage);
2214 AssertLogRelReturn(iLoop < 8192, VERR_PGM_POOL_TOO_MANY_LOOPS);
2215 }
2216
2217 /*
2218 * Found a usable page, flush it and return.
2219 */
2220 int rc = pgmPoolFlushPage(pPool, pPage);
2221 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2222 /** @todo find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2223 if (rc == VINF_SUCCESS)
2224 PGM_INVL_ALL_VCPU_TLBS(pVM);
2225 return rc;
2226}
2227
2228
2229/**
2230 * Checks if a kind mismatch is really a page being reused
2231 * or if it's just normal remappings.
2232 *
2233 * @returns true if reused and the cached page (enmKind1) should be flushed
2234 * @returns false if not reused.
2235 * @param enmKind1 The kind of the cached page.
2236 * @param enmKind2 The kind of the requested page.
2237 */
2238static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2239{
2240 switch (enmKind1)
2241 {
2242 /*
2243 * Never reuse them. There is no remapping in non-paging mode.
2244 */
2245 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2246 case PGMPOOLKIND_32BIT_PD_PHYS:
2247 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2248 case PGMPOOLKIND_PAE_PD_PHYS:
2249 case PGMPOOLKIND_PAE_PDPT_PHYS:
2250 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2251 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2252 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2253 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2254 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2255 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2256 return false;
2257
2258 /*
2259 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2260 */
2261 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2262 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2263 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2264 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2265 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2266 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2267 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2268 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2269 case PGMPOOLKIND_32BIT_PD:
2270 case PGMPOOLKIND_PAE_PDPT:
2271 Assert(!PGMPOOL_PAGE_IS_KIND_NESTED(enmKind2));
2272 switch (enmKind2)
2273 {
2274 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2275 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2276 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2277 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2278 case PGMPOOLKIND_64BIT_PML4:
2279 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2280 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2281 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2282 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2283 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2284 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2285 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2286 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2287 return true;
2288 default:
2289 return false;
2290 }
2291
2292 /*
2293 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2294 */
2295 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2296 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2297 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2298 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2299 case PGMPOOLKIND_64BIT_PML4:
2300 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2301 Assert(!PGMPOOL_PAGE_IS_KIND_NESTED(enmKind2));
2302 switch (enmKind2)
2303 {
2304 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2305 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2306 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2307 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2308 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2309 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2310 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2311 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2312 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2313 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2314 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2315 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2316 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2317 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2318 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2319 return true;
2320 default:
2321 return false;
2322 }
2323
2324#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2325 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
2326 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
2327 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
2328 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
2329 return PGMPOOL_PAGE_IS_KIND_NESTED(enmKind2);
2330
2331 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
2332 return false;
2333#endif
2334
2335 /*
2336 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2337 */
2338 case PGMPOOLKIND_ROOT_NESTED:
2339 return false;
2340
2341 default:
2342 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2343 }
2344}
2345
2346
2347/**
2348 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2349 *
2350 * @returns VBox status code.
2351 * @retval VINF_PGM_CACHED_PAGE on success.
2352 * @retval VERR_FILE_NOT_FOUND if not found.
2353 * @param pPool The pool.
2354 * @param GCPhys The GC physical address of the page we're gonna shadow.
2355 * @param enmKind The kind of mapping.
2356 * @param enmAccess Access type for the mapping (only relevant for big pages)
2357 * @param fA20Enabled Whether the CPU has the A20 gate enabled.
2358 * @param iUser The shadow page pool index of the user table. This is
2359 * NIL_PGMPOOL_IDX for root pages.
2360 * @param iUserTable The index into the user table (shadowed). Ignored if
2361 * root page
2362 * @param ppPage Where to store the pointer to the page.
2363 */
2364static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
2365 uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2366{
2367 /*
2368 * Look up the GCPhys in the hash.
2369 */
2370 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2371 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2372 if (i != NIL_PGMPOOL_IDX)
2373 {
2374 do
2375 {
2376 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2377 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2378 if (pPage->GCPhys == GCPhys)
2379 {
2380 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2381 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess
2382 && pPage->fA20Enabled == fA20Enabled)
2383 {
2384 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2385 * doesn't flush it in case there are no more free use records.
2386 */
2387 pgmPoolCacheUsed(pPool, pPage);
2388
2389 int rc = VINF_SUCCESS;
2390 if (iUser != NIL_PGMPOOL_IDX)
2391 rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2392 if (RT_SUCCESS(rc))
2393 {
2394 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2395 *ppPage = pPage;
2396 if (pPage->cModifications)
2397 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2398 STAM_COUNTER_INC(&pPool->StatCacheHits);
2399 return VINF_PGM_CACHED_PAGE;
2400 }
2401 return rc;
2402 }
2403
2404 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2405 {
2406 /*
2407 * The kind is different. In some cases we should now flush the page
2408 * as it has been reused, but in most cases this is normal remapping
2409 * of PDs as PT or big pages using the GCPhys field in a slightly
2410 * different way than the other kinds.
2411 */
2412 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2413 {
2414 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2415 pgmPoolFlushPage(pPool, pPage);
2416 break;
2417 }
2418 }
2419 }
2420
2421 /* next */
2422 i = pPage->iNext;
2423 } while (i != NIL_PGMPOOL_IDX);
2424 }
2425
2426 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2427 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2428 return VERR_FILE_NOT_FOUND;
2429}
2430
2431
2432/**
2433 * Inserts a page into the cache.
2434 *
2435 * @param pPool The pool.
2436 * @param pPage The cached page.
2437 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2438 */
2439static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2440{
2441 /*
2442 * Insert into the GCPhys hash if the page is fit for that.
2443 */
2444 Assert(!pPage->fCached);
2445 if (fCanBeCached)
2446 {
2447 pPage->fCached = true;
2448 pgmPoolHashInsert(pPool, pPage);
2449 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2450 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2451 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2452 }
2453 else
2454 {
2455 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2456 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2457 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2458 }
2459
2460 /*
2461 * Insert at the head of the age list.
2462 */
2463 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2464 pPage->iAgeNext = pPool->iAgeHead;
2465 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2466 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2467 else
2468 pPool->iAgeTail = pPage->idx;
2469 pPool->iAgeHead = pPage->idx;
2470}
2471
2472
2473/**
2474 * Flushes a cached page.
2475 *
2476 * @param pPool The pool.
2477 * @param pPage The cached page.
2478 */
2479static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2480{
2481 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2482
2483 /*
2484 * Remove the page from the hash.
2485 */
2486 if (pPage->fCached)
2487 {
2488 pPage->fCached = false;
2489 pgmPoolHashRemove(pPool, pPage);
2490 }
2491 else
2492 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2493
2494 /*
2495 * Remove it from the age list.
2496 */
2497 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2498 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2499 else
2500 pPool->iAgeTail = pPage->iAgePrev;
2501 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2502 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2503 else
2504 pPool->iAgeHead = pPage->iAgeNext;
2505 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2506 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2507}
2508
2509
2510/**
2511 * Looks for pages sharing the monitor.
2512 *
2513 * @returns Pointer to the head page.
2514 * @returns NULL if not found.
2515 * @param pPool The Pool
2516 * @param pNewPage The page which is going to be monitored.
2517 */
2518static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2519{
2520 /*
2521 * Look up the GCPhys in the hash.
2522 */
2523 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2524 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2525 if (i == NIL_PGMPOOL_IDX)
2526 return NULL;
2527 do
2528 {
2529 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2530 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2531 && pPage != pNewPage)
2532 {
2533 switch (pPage->enmKind)
2534 {
2535 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2536 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2537 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2538 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2539 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2540 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2541 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2542 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2543 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2544 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2545 case PGMPOOLKIND_64BIT_PML4:
2546 case PGMPOOLKIND_32BIT_PD:
2547 case PGMPOOLKIND_PAE_PDPT:
2548#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2549 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
2550 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
2551 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
2552#endif
2553 {
2554 /* find the head */
2555 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2556 {
2557 Assert(pPage->iMonitoredPrev != pPage->idx);
2558 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2559 }
2560 return pPage;
2561 }
2562
2563 /* ignore, no monitoring. */
2564 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2565 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2566 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2567 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2568 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2569 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2570 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2571 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2572 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2573 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2574 case PGMPOOLKIND_ROOT_NESTED:
2575 case PGMPOOLKIND_PAE_PD_PHYS:
2576 case PGMPOOLKIND_PAE_PDPT_PHYS:
2577 case PGMPOOLKIND_32BIT_PD_PHYS:
2578 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2579#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2580 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
2581 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
2582#endif
2583 break;
2584 default:
2585 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2586 }
2587 }
2588
2589 /* next */
2590 i = pPage->iNext;
2591 } while (i != NIL_PGMPOOL_IDX);
2592 return NULL;
2593}
2594
2595
2596/**
2597 * Enabled write monitoring of a guest page.
2598 *
2599 * @returns VBox status code.
2600 * @retval VINF_SUCCESS on success.
2601 * @param pPool The pool.
2602 * @param pPage The cached page.
2603 */
2604static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2605{
2606 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2607
2608 /*
2609 * Filter out the relevant kinds.
2610 */
2611 switch (pPage->enmKind)
2612 {
2613 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2614 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2615 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2616 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2617 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2618 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2619 case PGMPOOLKIND_64BIT_PML4:
2620 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2621 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2622 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2623 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2624 case PGMPOOLKIND_32BIT_PD:
2625 case PGMPOOLKIND_PAE_PDPT:
2626 break;
2627
2628 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2629 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2630 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2631 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2632 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2633 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2634 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2635 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2636 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2637 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2638 case PGMPOOLKIND_ROOT_NESTED:
2639 /* Nothing to monitor here. */
2640 return VINF_SUCCESS;
2641
2642 case PGMPOOLKIND_32BIT_PD_PHYS:
2643 case PGMPOOLKIND_PAE_PDPT_PHYS:
2644 case PGMPOOLKIND_PAE_PD_PHYS:
2645 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2646 /* Nothing to monitor here. */
2647 return VINF_SUCCESS;
2648
2649#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2650 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
2651 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
2652 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
2653 break;
2654
2655 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
2656 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
2657 /* Nothing to monitor here. */
2658 return VINF_SUCCESS;
2659#endif
2660
2661 default:
2662 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2663 }
2664
2665 /*
2666 * Install handler.
2667 */
2668 int rc;
2669 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2670 if (pPageHead)
2671 {
2672 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2673 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2674
2675#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2676 if (pPageHead->fDirty)
2677 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirtyEntry, false /* do not remove */);
2678#endif
2679
2680 pPage->iMonitoredPrev = pPageHead->idx;
2681 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2682 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2683 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2684 pPageHead->iMonitoredNext = pPage->idx;
2685 rc = VINF_SUCCESS;
2686 if (PGMPOOL_PAGE_IS_NESTED(pPage))
2687 Log7Func(("Adding to monitoring list GCPhysPage=%RGp\n", pPage->GCPhys));
2688 }
2689 else
2690 {
2691 if (PGMPOOL_PAGE_IS_NESTED(pPage))
2692 Log7Func(("Started monitoring GCPhysPage=%RGp HCPhys=%RHp enmKind=%s\n", pPage->GCPhys, pPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
2693
2694 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2695 PVMCC pVM = pPool->CTX_SUFF(pVM);
2696 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2697 rc = PGMHandlerPhysicalRegister(pVM, GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK, pPool->hAccessHandlerType,
2698 pPage - &pPool->aPages[0], NIL_RTR3PTR /*pszDesc*/);
2699 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2700 * the heap size should suffice. */
2701 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2702 PVMCPU pVCpu = VMMGetCpu(pVM);
2703 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2704 }
2705 pPage->fMonitored = true;
2706 return rc;
2707}
2708
2709
2710/**
2711 * Disables write monitoring of a guest page.
2712 *
2713 * @returns VBox status code.
2714 * @retval VINF_SUCCESS on success.
2715 * @param pPool The pool.
2716 * @param pPage The cached page.
2717 */
2718static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2719{
2720 /*
2721 * Filter out the relevant kinds.
2722 */
2723 switch (pPage->enmKind)
2724 {
2725 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2726 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2727 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2728 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2729 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2730 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2731 case PGMPOOLKIND_64BIT_PML4:
2732 case PGMPOOLKIND_32BIT_PD:
2733 case PGMPOOLKIND_PAE_PDPT:
2734 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2735 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2736 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2737 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2738 break;
2739
2740 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2741 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2742 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2743 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2744 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2745 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2746 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2747 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2748 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2749 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2750 case PGMPOOLKIND_ROOT_NESTED:
2751 case PGMPOOLKIND_PAE_PD_PHYS:
2752 case PGMPOOLKIND_PAE_PDPT_PHYS:
2753 case PGMPOOLKIND_32BIT_PD_PHYS:
2754 /* Nothing to monitor here. */
2755 Assert(!pPage->fMonitored);
2756 return VINF_SUCCESS;
2757
2758#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2759 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
2760 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
2761 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
2762 break;
2763
2764 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
2765 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
2766 /* Nothing to monitor here. */
2767 Assert(!pPage->fMonitored);
2768 return VINF_SUCCESS;
2769#endif
2770
2771 default:
2772 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2773 }
2774 Assert(pPage->fMonitored);
2775
2776 /*
2777 * Remove the page from the monitored list or uninstall it if last.
2778 */
2779 const PVMCC pVM = pPool->CTX_SUFF(pVM);
2780 int rc;
2781 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2782 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2783 {
2784 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2785 {
2786 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2787 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2788 rc = PGMHandlerPhysicalChangeUserArg(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK, pPage->iMonitoredNext);
2789
2790 AssertFatalRCSuccess(rc);
2791 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2792 }
2793 else
2794 {
2795 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2796 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2797 {
2798 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2799 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2800 }
2801 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2802 rc = VINF_SUCCESS;
2803 }
2804 }
2805 else
2806 {
2807 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2808 AssertFatalRC(rc);
2809 PVMCPU pVCpu = VMMGetCpu(pVM);
2810 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2811 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2812 }
2813 pPage->fMonitored = false;
2814
2815 /*
2816 * Remove it from the list of modified pages (if in it).
2817 */
2818 pgmPoolMonitorModifiedRemove(pPool, pPage);
2819
2820 if (PGMPOOL_PAGE_IS_NESTED(pPage))
2821 Log7Func(("Stopped monitoring %RGp\n", pPage->GCPhys));
2822
2823 return rc;
2824}
2825
2826
2827/**
2828 * Inserts the page into the list of modified pages.
2829 *
2830 * @param pPool The pool.
2831 * @param pPage The page.
2832 */
2833void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2834{
2835 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2836 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2837 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2838 && pPool->iModifiedHead != pPage->idx,
2839 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2840 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2841 pPool->iModifiedHead, pPool->cModifiedPages));
2842
2843 pPage->iModifiedNext = pPool->iModifiedHead;
2844 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2845 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2846 pPool->iModifiedHead = pPage->idx;
2847 pPool->cModifiedPages++;
2848#ifdef VBOX_WITH_STATISTICS
2849 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2850 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2851#endif
2852}
2853
2854
2855/**
2856 * Removes the page from the list of modified pages and resets the
2857 * modification counter.
2858 *
2859 * @param pPool The pool.
2860 * @param pPage The page which is believed to be in the list of modified pages.
2861 */
2862static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2863{
2864 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2865 if (pPool->iModifiedHead == pPage->idx)
2866 {
2867 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2868 pPool->iModifiedHead = pPage->iModifiedNext;
2869 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2870 {
2871 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2872 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2873 }
2874 pPool->cModifiedPages--;
2875 }
2876 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2877 {
2878 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2879 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2880 {
2881 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2882 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2883 }
2884 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2885 pPool->cModifiedPages--;
2886 }
2887 else
2888 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2889 pPage->cModifications = 0;
2890}
2891
2892
2893/**
2894 * Zaps the list of modified pages, resetting their modification counters in the process.
2895 *
2896 * @param pVM The cross context VM structure.
2897 */
2898static void pgmPoolMonitorModifiedClearAll(PVMCC pVM)
2899{
2900 PGM_LOCK_VOID(pVM);
2901 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2902 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2903
2904 unsigned cPages = 0; NOREF(cPages);
2905
2906#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2907 pgmPoolResetDirtyPages(pVM);
2908#endif
2909
2910 uint16_t idx = pPool->iModifiedHead;
2911 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2912 while (idx != NIL_PGMPOOL_IDX)
2913 {
2914 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2915 idx = pPage->iModifiedNext;
2916 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2917 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2918 pPage->cModifications = 0;
2919 Assert(++cPages);
2920 }
2921 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2922 pPool->cModifiedPages = 0;
2923 PGM_UNLOCK(pVM);
2924}
2925
2926
2927/**
2928 * Handle SyncCR3 pool tasks
2929 *
2930 * @returns VBox status code.
2931 * @retval VINF_SUCCESS if successfully added.
2932 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2933 * @param pVCpu The cross context virtual CPU structure.
2934 * @remark Should only be used when monitoring is available, thus placed in
2935 * the PGMPOOL_WITH_MONITORING \#ifdef.
2936 */
2937int pgmPoolSyncCR3(PVMCPUCC pVCpu)
2938{
2939 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2940 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2941
2942 /*
2943 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2944 * Occasionally we will have to clear all the shadow page tables because we wanted
2945 * to monitor a page which was mapped by too many shadowed page tables. This operation
2946 * sometimes referred to as a 'lightweight flush'.
2947 */
2948# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2949 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2950 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2951# else /* !IN_RING3 */
2952 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2953 {
2954 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2955 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2956
2957 /* Make sure all other VCPUs return to ring 3. */
2958 if (pVM->cCpus > 1)
2959 {
2960 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2961 PGM_INVL_ALL_VCPU_TLBS(pVM);
2962 }
2963 return VINF_PGM_SYNC_CR3;
2964 }
2965# endif /* !IN_RING3 */
2966 else
2967 {
2968 pgmPoolMonitorModifiedClearAll(pVM);
2969
2970 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2971 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2972 {
2973 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2974 return pgmPoolSyncCR3(pVCpu);
2975 }
2976 }
2977 return VINF_SUCCESS;
2978}
2979
2980
2981/**
2982 * Frees up at least one user entry.
2983 *
2984 * @returns VBox status code.
2985 * @retval VINF_SUCCESS if successfully added.
2986 *
2987 * @param pPool The pool.
2988 * @param iUser The user index.
2989 */
2990static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2991{
2992 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2993 /*
2994 * Just free cached pages in a braindead fashion.
2995 */
2996 /** @todo walk the age list backwards and free the first with usage. */
2997 int rc = VINF_SUCCESS;
2998 do
2999 {
3000 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
3001 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
3002 rc = rc2;
3003 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
3004 return rc;
3005}
3006
3007
3008/**
3009 * Inserts a page into the cache.
3010 *
3011 * This will create user node for the page, insert it into the GCPhys
3012 * hash, and insert it into the age list.
3013 *
3014 * @returns VBox status code.
3015 * @retval VINF_SUCCESS if successfully added.
3016 *
3017 * @param pPool The pool.
3018 * @param pPage The cached page.
3019 * @param GCPhys The GC physical address of the page we're gonna shadow.
3020 * @param iUser The user index.
3021 * @param iUserTable The user table index.
3022 */
3023DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
3024{
3025 int rc = VINF_SUCCESS;
3026 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3027
3028 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable)); RT_NOREF_PV(GCPhys);
3029
3030 if (iUser != NIL_PGMPOOL_IDX)
3031 {
3032#ifdef VBOX_STRICT
3033 /*
3034 * Check that the entry doesn't already exists.
3035 */
3036 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
3037 {
3038 uint16_t i = pPage->iUserHead;
3039 do
3040 {
3041 Assert(i < pPool->cMaxUsers);
3042 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
3043 i = paUsers[i].iNext;
3044 } while (i != NIL_PGMPOOL_USER_INDEX);
3045 }
3046#endif
3047
3048 /*
3049 * Find free a user node.
3050 */
3051 uint16_t i = pPool->iUserFreeHead;
3052 if (i == NIL_PGMPOOL_USER_INDEX)
3053 {
3054 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
3055 if (RT_FAILURE(rc))
3056 return rc;
3057 i = pPool->iUserFreeHead;
3058 }
3059
3060 /*
3061 * Unlink the user node from the free list,
3062 * initialize and insert it into the user list.
3063 */
3064 pPool->iUserFreeHead = paUsers[i].iNext;
3065 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
3066 paUsers[i].iUser = iUser;
3067 paUsers[i].iUserTable = iUserTable;
3068 pPage->iUserHead = i;
3069 }
3070 else
3071 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3072
3073
3074 /*
3075 * Insert into cache and enable monitoring of the guest page if enabled.
3076 *
3077 * Until we implement caching of all levels, including the CR3 one, we'll
3078 * have to make sure we don't try monitor & cache any recursive reuse of
3079 * a monitored CR3 page. Because all windows versions are doing this we'll
3080 * have to be able to do combined access monitoring, CR3 + PT and
3081 * PD + PT (guest PAE).
3082 *
3083 * Update:
3084 * We're now cooperating with the CR3 monitor if an uncachable page is found.
3085 */
3086 const bool fCanBeMonitored = true;
3087 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
3088 if (fCanBeMonitored)
3089 {
3090 rc = pgmPoolMonitorInsert(pPool, pPage);
3091 AssertRC(rc);
3092 }
3093 return rc;
3094}
3095
3096
3097/**
3098 * Adds a user reference to a page.
3099 *
3100 * This will move the page to the head of the
3101 *
3102 * @returns VBox status code.
3103 * @retval VINF_SUCCESS if successfully added.
3104 *
3105 * @param pPool The pool.
3106 * @param pPage The cached page.
3107 * @param iUser The user index.
3108 * @param iUserTable The user table.
3109 */
3110static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3111{
3112 Log3(("pgmPoolTrackAddUser: GCPhys=%RGp iUser=%x iUserTable=%x\n", pPage->GCPhys, iUser, iUserTable));
3113 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3114 Assert(iUser != NIL_PGMPOOL_IDX);
3115
3116# ifdef VBOX_STRICT
3117 /*
3118 * Check that the entry doesn't already exists. We only allow multiple
3119 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
3120 */
3121 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
3122 {
3123 uint16_t i = pPage->iUserHead;
3124 do
3125 {
3126 Assert(i < pPool->cMaxUsers);
3127 /** @todo this assertion looks odd... Shouldn't it be && here? */
3128 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
3129 i = paUsers[i].iNext;
3130 } while (i != NIL_PGMPOOL_USER_INDEX);
3131 }
3132# endif
3133
3134 /*
3135 * Allocate a user node.
3136 */
3137 uint16_t i = pPool->iUserFreeHead;
3138 if (i == NIL_PGMPOOL_USER_INDEX)
3139 {
3140 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
3141 if (RT_FAILURE(rc))
3142 return rc;
3143 i = pPool->iUserFreeHead;
3144 }
3145 pPool->iUserFreeHead = paUsers[i].iNext;
3146
3147 /*
3148 * Initialize the user node and insert it.
3149 */
3150 paUsers[i].iNext = pPage->iUserHead;
3151 paUsers[i].iUser = iUser;
3152 paUsers[i].iUserTable = iUserTable;
3153 pPage->iUserHead = i;
3154
3155# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3156 if (pPage->fDirty)
3157 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirtyEntry, false /* do not remove */);
3158# endif
3159
3160 /*
3161 * Tell the cache to update its replacement stats for this page.
3162 */
3163 pgmPoolCacheUsed(pPool, pPage);
3164 return VINF_SUCCESS;
3165}
3166
3167
3168/**
3169 * Frees a user record associated with a page.
3170 *
3171 * This does not clear the entry in the user table, it simply replaces the
3172 * user record to the chain of free records.
3173 *
3174 * @param pPool The pool.
3175 * @param pPage The shadow page.
3176 * @param iUser The shadow page pool index of the user table.
3177 * @param iUserTable The index into the user table (shadowed).
3178 *
3179 * @remarks Don't call this for root pages.
3180 */
3181static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3182{
3183 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
3184 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3185 Assert(iUser != NIL_PGMPOOL_IDX);
3186
3187 /*
3188 * Unlink and free the specified user entry.
3189 */
3190
3191 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
3192 uint16_t i = pPage->iUserHead;
3193 if ( i != NIL_PGMPOOL_USER_INDEX
3194 && paUsers[i].iUser == iUser
3195 && paUsers[i].iUserTable == iUserTable)
3196 {
3197 pPage->iUserHead = paUsers[i].iNext;
3198
3199 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3200 paUsers[i].iNext = pPool->iUserFreeHead;
3201 pPool->iUserFreeHead = i;
3202 return;
3203 }
3204
3205 /* General: Linear search. */
3206 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
3207 while (i != NIL_PGMPOOL_USER_INDEX)
3208 {
3209 if ( paUsers[i].iUser == iUser
3210 && paUsers[i].iUserTable == iUserTable)
3211 {
3212 if (iPrev != NIL_PGMPOOL_USER_INDEX)
3213 paUsers[iPrev].iNext = paUsers[i].iNext;
3214 else
3215 pPage->iUserHead = paUsers[i].iNext;
3216
3217 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3218 paUsers[i].iNext = pPool->iUserFreeHead;
3219 pPool->iUserFreeHead = i;
3220 return;
3221 }
3222 iPrev = i;
3223 i = paUsers[i].iNext;
3224 }
3225
3226 /* Fatal: didn't find it */
3227 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
3228 iUser, iUserTable, pPage->GCPhys));
3229}
3230
3231
3232#if 0 /* unused */
3233/**
3234 * Gets the entry size of a shadow table.
3235 *
3236 * @param enmKind The kind of page.
3237 *
3238 * @returns The size of the entry in bytes. That is, 4 or 8.
3239 * @returns If the kind is not for a table, an assertion is raised and 0 is
3240 * returned.
3241 */
3242DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
3243{
3244 switch (enmKind)
3245 {
3246 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3247 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3248 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3249 case PGMPOOLKIND_32BIT_PD:
3250 case PGMPOOLKIND_32BIT_PD_PHYS:
3251 return 4;
3252
3253 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3254 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3255 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3256 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3257 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3258 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3259 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3260 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3261 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3262 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3263 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3264 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3265 case PGMPOOLKIND_64BIT_PML4:
3266 case PGMPOOLKIND_PAE_PDPT:
3267 case PGMPOOLKIND_ROOT_NESTED:
3268 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3269 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3270 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3271 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3272 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3273 case PGMPOOLKIND_PAE_PD_PHYS:
3274 case PGMPOOLKIND_PAE_PDPT_PHYS:
3275 return 8;
3276
3277 default:
3278 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3279 }
3280}
3281#endif /* unused */
3282
3283#if 0 /* unused */
3284/**
3285 * Gets the entry size of a guest table.
3286 *
3287 * @param enmKind The kind of page.
3288 *
3289 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3290 * @returns If the kind is not for a table, an assertion is raised and 0 is
3291 * returned.
3292 */
3293DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3294{
3295 switch (enmKind)
3296 {
3297 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3298 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3299 case PGMPOOLKIND_32BIT_PD:
3300 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3301 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3302 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3303 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3304 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3305 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3306 return 4;
3307
3308 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3309 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3310 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3311 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3312 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3313 case PGMPOOLKIND_64BIT_PML4:
3314 case PGMPOOLKIND_PAE_PDPT:
3315 return 8;
3316
3317 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3318 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3319 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3320 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3321 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3322 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3323 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3324 case PGMPOOLKIND_ROOT_NESTED:
3325 case PGMPOOLKIND_PAE_PD_PHYS:
3326 case PGMPOOLKIND_PAE_PDPT_PHYS:
3327 case PGMPOOLKIND_32BIT_PD_PHYS:
3328 /** @todo can we return 0? (nobody is calling this...) */
3329 AssertFailed();
3330 return 0;
3331
3332 default:
3333 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3334 }
3335}
3336#endif /* unused */
3337
3338
3339/**
3340 * Checks one shadow page table entry for a mapping of a physical page.
3341 *
3342 * @returns true / false indicating removal of all relevant PTEs
3343 *
3344 * @param pVM The cross context VM structure.
3345 * @param pPhysPage The guest page in question.
3346 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3347 * @param iShw The shadow page table.
3348 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3349 */
3350static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3351{
3352 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3353 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3354 bool fRet = false;
3355
3356 /*
3357 * Assert sanity.
3358 */
3359 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3360 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3361 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3362
3363 /*
3364 * Then, clear the actual mappings to the page in the shadow PT.
3365 */
3366 switch (pPage->enmKind)
3367 {
3368 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3369 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3370 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3371 {
3372 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3373 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3374 uint32_t u32AndMask = 0;
3375 uint32_t u32OrMask = 0;
3376
3377 if (!fFlushPTEs)
3378 {
3379 /* Note! Disregarding the PGMPHYSHANDLER_F_NOT_IN_HM bit here. Should be harmless. */
3380 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3381 {
3382 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3383 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3384 u32OrMask = X86_PTE_RW;
3385 u32AndMask = UINT32_MAX;
3386 fRet = true;
3387 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3388 break;
3389
3390 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3391 u32OrMask = 0;
3392 u32AndMask = ~X86_PTE_RW;
3393 fRet = true;
3394 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3395 break;
3396 default:
3397 /* We will end up here when called with an "ALL" access handler. */
3398 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3399 break;
3400 }
3401 }
3402 else
3403 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3404
3405 /* Update the counter if we're removing references. */
3406 if (!u32AndMask)
3407 {
3408 Assert(pPage->cPresent);
3409 Assert(pPool->cPresent);
3410 pPage->cPresent--;
3411 pPool->cPresent--;
3412 }
3413
3414 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3415 {
3416 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3417 X86PTE Pte;
3418 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3419 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3420 Pte.u &= ~(X86PGUINT)X86_PTE_RW; /* need to disallow writes when dirty bit tracking is still active. */
3421
3422 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3423 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3424 return fRet;
3425 }
3426#ifdef LOG_ENABLED
3427 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3428 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3429 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3430 {
3431 Log(("i=%d cFound=%d\n", i, ++cFound));
3432 }
3433#endif
3434 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3435 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3436 break;
3437 }
3438
3439 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3440 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3441 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3442 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3443 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3444 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3445#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
3446 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
3447# ifdef PGM_WITH_LARGE_PAGES
3448 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
3449# endif
3450#endif
3451 {
3452 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3453 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3454 uint64_t u64OrMask = 0;
3455 uint64_t u64AndMask = 0;
3456
3457 if (!fFlushPTEs)
3458 {
3459 /* Note! Disregarding the PGMPHYSHANDLER_F_NOT_IN_HM bit here. Should be harmless. */
3460 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3461 {
3462 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3463 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3464 u64OrMask = X86_PTE_RW;
3465 u64AndMask = UINT64_MAX;
3466 fRet = true;
3467 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3468 break;
3469
3470 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3471 u64OrMask = 0;
3472 u64AndMask = ~(uint64_t)X86_PTE_RW;
3473 fRet = true;
3474 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3475 break;
3476
3477 default:
3478 /* We will end up here when called with an "ALL" access handler. */
3479 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3480 break;
3481 }
3482 }
3483 else
3484 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3485
3486 /* Update the counter if we're removing references. */
3487 if (!u64AndMask)
3488 {
3489 Assert(pPage->cPresent);
3490 Assert(pPool->cPresent);
3491 pPage->cPresent--;
3492 pPool->cPresent--;
3493 }
3494
3495 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3496 {
3497 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3498 X86PTEPAE Pte;
3499 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3500 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3501 Pte.u &= ~(X86PGPAEUINT)X86_PTE_RW; /* need to disallow writes when dirty bit tracking is still active. */
3502
3503 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3504 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3505 return fRet;
3506 }
3507#ifdef LOG_ENABLED
3508 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3509 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3510 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3511 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3512 Log(("i=%d cFound=%d\n", i, ++cFound));
3513#endif
3514 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3515 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3516 break;
3517 }
3518
3519#ifdef PGM_WITH_LARGE_PAGES
3520 /* Large page case only. */
3521 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3522 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
3523 {
3524 Assert(pVM->pgm.s.fNestedPaging);
3525
3526 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3527 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3528
3529 Assert( pPage->enmKind != PGMPOOLKIND_EPT_PD_FOR_EPT_PD
3530 || (pPD->a[iPte].u & EPT_E_LEAF));
3531
3532 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3533 {
3534 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3535 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3536 pPD->a[iPte].u = 0;
3537 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3538
3539 /* Update the counter as we're removing references. */
3540 Assert(pPage->cPresent);
3541 Assert(pPool->cPresent);
3542 pPage->cPresent--;
3543 pPool->cPresent--;
3544
3545 return fRet;
3546 }
3547# ifdef LOG_ENABLED
3548 LogRel(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3549 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3550 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3551 {
3552 cFound++;
3553 LogRel(("i=%d cFound=%d\n", i, cFound));
3554 }
3555# endif
3556 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d enmKind=%d\n", pPage->iFirstPresent, pPage->cPresent, pPage->enmKind));
3557 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3558 break;
3559 }
3560
3561 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3562 case PGMPOOLKIND_PAE_PD_PHYS:
3563 {
3564 Assert(pVM->pgm.s.fNestedPaging);
3565
3566 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3567 PX86PDPAE pPD = (PX86PDPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3568
3569 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3570 {
3571 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3572 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3573 pPD->a[iPte].u = 0;
3574 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3575
3576 /* Update the counter as we're removing references. */
3577 Assert(pPage->cPresent);
3578 Assert(pPool->cPresent);
3579 pPage->cPresent--;
3580 pPool->cPresent--;
3581 return fRet;
3582 }
3583# ifdef LOG_ENABLED
3584 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3585 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3586 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3587 Log(("i=%d cFound=%d\n", i, ++cFound));
3588# endif
3589 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3590 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3591 break;
3592 }
3593#endif /* PGM_WITH_LARGE_PAGES */
3594
3595 default:
3596 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3597 }
3598
3599 /* not reached. */
3600#ifndef _MSC_VER
3601 return fRet;
3602#endif
3603}
3604
3605
3606/**
3607 * Scans one shadow page table for mappings of a physical page.
3608 *
3609 * @param pVM The cross context VM structure.
3610 * @param pPhysPage The guest page in question.
3611 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3612 * @param iShw The shadow page table.
3613 */
3614static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3615{
3616 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3617
3618 /* We should only come here with when there's only one reference to this physical page. */
3619 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3620
3621 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3622 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3623 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3624 if (!fKeptPTEs)
3625 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3626 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3627}
3628
3629
3630/**
3631 * Flushes a list of shadow page tables mapping the same physical page.
3632 *
3633 * @param pVM The cross context VM structure.
3634 * @param pPhysPage The guest page in question.
3635 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3636 * @param iPhysExt The physical cross reference extent list to flush.
3637 */
3638static void pgmPoolTrackFlushGCPhysPTs(PVMCC pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3639{
3640 PGM_LOCK_ASSERT_OWNER(pVM);
3641 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3642 bool fKeepList = false;
3643
3644 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3645 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt=%u\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3646
3647 const uint16_t iPhysExtStart = iPhysExt;
3648 PPGMPOOLPHYSEXT pPhysExt;
3649 do
3650 {
3651 Assert(iPhysExt < pPool->cMaxPhysExts);
3652 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3653 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3654 {
3655 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3656 {
3657 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3658 if (!fKeptPTEs)
3659 {
3660 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3661 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3662 }
3663 else
3664 fKeepList = true;
3665 }
3666 }
3667 /* next */
3668 iPhysExt = pPhysExt->iNext;
3669 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3670
3671 if (!fKeepList)
3672 {
3673 /* insert the list into the free list and clear the ram range entry. */
3674 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3675 pPool->iPhysExtFreeHead = iPhysExtStart;
3676 /* Invalidate the tracking data. */
3677 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3678 }
3679
3680 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3681}
3682
3683
3684/**
3685 * Flushes all shadow page table mappings of the given guest page.
3686 *
3687 * This is typically called when the host page backing the guest one has been
3688 * replaced or when the page protection was changed due to a guest access
3689 * caught by the monitoring.
3690 *
3691 * @returns VBox status code.
3692 * @retval VINF_SUCCESS if all references has been successfully cleared.
3693 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3694 * pool cleaning. FF and sync flags are set.
3695 *
3696 * @param pVM The cross context VM structure.
3697 * @param GCPhysPage GC physical address of the page in question
3698 * @param pPhysPage The guest page in question.
3699 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3700 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3701 * flushed, it is NOT touched if this isn't necessary.
3702 * The caller MUST initialized this to @a false.
3703 */
3704int pgmPoolTrackUpdateGCPhys(PVMCC pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3705{
3706 PVMCPUCC pVCpu = VMMGetCpu(pVM);
3707 PGM_LOCK_VOID(pVM);
3708 int rc = VINF_SUCCESS;
3709
3710#ifdef PGM_WITH_LARGE_PAGES
3711 /* Is this page part of a large page? */
3712 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3713 {
3714 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3715 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3716
3717 /* Fetch the large page base. */
3718 PPGMPAGE pLargePage;
3719 if (GCPhysBase != GCPhysPage)
3720 {
3721 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3722 AssertFatal(pLargePage);
3723 }
3724 else
3725 pLargePage = pPhysPage;
3726
3727 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3728
3729 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3730 {
3731 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3732 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3733 pVM->pgm.s.cLargePagesDisabled++;
3734
3735 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3736 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3737
3738 *pfFlushTLBs = true;
3739 PGM_UNLOCK(pVM);
3740 return rc;
3741 }
3742 }
3743#else
3744 NOREF(GCPhysPage);
3745#endif /* PGM_WITH_LARGE_PAGES */
3746
3747 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3748 if (u16)
3749 {
3750 /*
3751 * The zero page is currently screwing up the tracking and we'll
3752 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3753 * is defined, zero pages won't normally be mapped. Some kind of solution
3754 * will be needed for this problem of course, but it will have to wait...
3755 */
3756# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC /* end up guruing after pgmR0PhysAllocateLargePage otherwise. */
3757 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3758 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3759# else
3760 if (PGM_PAGE_IS_BALLOONED(pPhysPage))
3761# endif
3762 rc = VINF_PGM_GCPHYS_ALIASED;
3763 else
3764 {
3765 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3766 {
3767 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3768 pgmPoolTrackFlushGCPhysPT(pVM,
3769 pPhysPage,
3770 fFlushPTEs,
3771 PGMPOOL_TD_GET_IDX(u16));
3772 }
3773 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3774 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3775 else
3776 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3777 *pfFlushTLBs = true;
3778 }
3779 }
3780
3781 if (rc == VINF_PGM_GCPHYS_ALIASED)
3782 {
3783 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3784 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3785 rc = VINF_PGM_SYNC_CR3;
3786 }
3787 PGM_UNLOCK(pVM);
3788 return rc;
3789}
3790
3791
3792/**
3793 * Scans all shadow page tables for mappings of a physical page.
3794 *
3795 * This may be slow, but it's most likely more efficient than cleaning
3796 * out the entire page pool / cache.
3797 *
3798 * @returns VBox status code.
3799 * @retval VINF_SUCCESS if all references has been successfully cleared.
3800 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3801 * a page pool cleaning.
3802 *
3803 * @param pVM The cross context VM structure.
3804 * @param pPhysPage The guest page in question.
3805 */
3806int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage)
3807{
3808 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3809 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3810 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3811 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3812
3813 /*
3814 * There is a limit to what makes sense.
3815 */
3816 if ( pPool->cPresent > 1024
3817 && pVM->cCpus == 1)
3818 {
3819 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3820 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3821 return VINF_PGM_GCPHYS_ALIASED;
3822 }
3823
3824 /*
3825 * Iterate all the pages until we've encountered all that in use.
3826 * This is simple but not quite optimal solution.
3827 */
3828 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage);
3829 unsigned cLeft = pPool->cUsedPages;
3830 unsigned iPage = pPool->cCurPages;
3831 while (--iPage >= PGMPOOL_IDX_FIRST)
3832 {
3833 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3834 if ( pPage->GCPhys != NIL_RTGCPHYS
3835 && pPage->cPresent)
3836 {
3837 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage)); /* see if it hits */
3838 switch (pPage->enmKind)
3839 {
3840 /*
3841 * We only care about shadow page tables.
3842 */
3843 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3844 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3845 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3846 {
3847 const uint32_t u32 = (uint32_t)u64;
3848 unsigned cPresent = pPage->cPresent;
3849 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3850 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3851 {
3852 const X86PGUINT uPte = pPT->a[i].u;
3853 if (uPte & X86_PTE_P)
3854 {
3855 if ((uPte & X86_PTE_PG_MASK) == u32)
3856 {
3857 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3858 ASMAtomicWriteU32(&pPT->a[i].u, 0);
3859
3860 /* Update the counter as we're removing references. */
3861 Assert(pPage->cPresent);
3862 Assert(pPool->cPresent);
3863 pPage->cPresent--;
3864 pPool->cPresent--;
3865 }
3866 if (!--cPresent)
3867 break;
3868 }
3869 }
3870 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3871 break;
3872 }
3873
3874 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3875 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3876 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3877 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3878 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3879 {
3880 unsigned cPresent = pPage->cPresent;
3881 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3882 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3883 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3884 {
3885 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & X86_PTE_PAE_PG_MASK) == u64)
3886 {
3887 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3888 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[i], 0); /// @todo why not atomic?
3889
3890 /* Update the counter as we're removing references. */
3891 Assert(pPage->cPresent);
3892 Assert(pPool->cPresent);
3893 pPage->cPresent--;
3894 pPool->cPresent--;
3895 }
3896 if (!--cPresent)
3897 break;
3898 }
3899 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3900 break;
3901 }
3902
3903 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3904 {
3905 unsigned cPresent = pPage->cPresent;
3906 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3907 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3908 {
3909 X86PGPAEUINT const uPte = pPT->a[i].u;
3910 if (uPte & EPT_E_READ)
3911 {
3912 if ((uPte & EPT_PTE_PG_MASK) == u64)
3913 {
3914 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3915 ASMAtomicWriteU64(&pPT->a[i].u, 0);
3916
3917 /* Update the counter as we're removing references. */
3918 Assert(pPage->cPresent);
3919 Assert(pPool->cPresent);
3920 pPage->cPresent--;
3921 pPool->cPresent--;
3922 }
3923 if (!--cPresent)
3924 break;
3925 }
3926 }
3927 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3928 break;
3929 }
3930 }
3931
3932 if (!--cLeft)
3933 break;
3934 }
3935 }
3936
3937 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3938 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3939
3940 /*
3941 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3942 */
3943 if (pPool->cPresent > 1024)
3944 {
3945 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3946 return VINF_PGM_GCPHYS_ALIASED;
3947 }
3948
3949 return VINF_SUCCESS;
3950}
3951
3952
3953/**
3954 * Clears the user entry in a user table.
3955 *
3956 * This is used to remove all references to a page when flushing it.
3957 */
3958static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3959{
3960 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3961 Assert(pUser->iUser < pPool->cCurPages);
3962 uint32_t iUserTable = pUser->iUserTable;
3963
3964 /*
3965 * Map the user page. Ignore references made by fictitious pages.
3966 */
3967 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3968 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3969 union
3970 {
3971 uint64_t *pau64;
3972 uint32_t *pau32;
3973 } u;
3974 if (pUserPage->idx < PGMPOOL_IDX_FIRST)
3975 {
3976 Assert(!pUserPage->pvPageR3);
3977 return;
3978 }
3979 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3980
3981
3982 /* Safety precaution in case we change the paging for other modes too in the future. */
3983 Assert(!pgmPoolIsPageLocked(pPage)); RT_NOREF_PV(pPage);
3984
3985#ifdef VBOX_STRICT
3986 /*
3987 * Some sanity checks.
3988 */
3989 switch (pUserPage->enmKind)
3990 {
3991 case PGMPOOLKIND_32BIT_PD:
3992 case PGMPOOLKIND_32BIT_PD_PHYS:
3993 Assert(iUserTable < X86_PG_ENTRIES);
3994 break;
3995 case PGMPOOLKIND_PAE_PDPT:
3996 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3997 case PGMPOOLKIND_PAE_PDPT_PHYS:
3998 Assert(iUserTable < 4);
3999 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
4000 break;
4001 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4002 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4003 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4004 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4005 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4006 case PGMPOOLKIND_PAE_PD_PHYS:
4007 Assert(iUserTable < X86_PG_PAE_ENTRIES);
4008 break;
4009 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4010 Assert(iUserTable < X86_PG_PAE_ENTRIES);
4011 break;
4012 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4013 Assert(iUserTable < X86_PG_PAE_ENTRIES);
4014 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
4015 break;
4016 case PGMPOOLKIND_64BIT_PML4:
4017 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
4018 /* GCPhys >> PAGE_SHIFT is the index here */
4019 break;
4020 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4021 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4022 Assert(iUserTable < X86_PG_PAE_ENTRIES);
4023 break;
4024
4025 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4026 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4027 Assert(iUserTable < X86_PG_PAE_ENTRIES);
4028 break;
4029
4030 case PGMPOOLKIND_ROOT_NESTED:
4031 Assert(iUserTable < X86_PG_PAE_ENTRIES);
4032 break;
4033
4034# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
4035 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
4036 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
4037 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
4038 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
4039 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
4040 Assert(iUserTable < EPT_PG_ENTRIES);
4041 break;
4042# endif
4043
4044 default:
4045 AssertMsgFailed(("enmKind=%d GCPhys=%RGp\n", pUserPage->enmKind, pPage->GCPhys));
4046 break;
4047 }
4048#endif /* VBOX_STRICT */
4049
4050 /*
4051 * Clear the entry in the user page.
4052 */
4053 switch (pUserPage->enmKind)
4054 {
4055 /* 32-bit entries */
4056 case PGMPOOLKIND_32BIT_PD:
4057 case PGMPOOLKIND_32BIT_PD_PHYS:
4058 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
4059 break;
4060
4061 /* 64-bit entries */
4062 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4063 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4064 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4065 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4066 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4067 case PGMPOOLKIND_PAE_PD_PHYS:
4068 case PGMPOOLKIND_PAE_PDPT_PHYS:
4069 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4070 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4071 case PGMPOOLKIND_64BIT_PML4:
4072 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4073 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4074 case PGMPOOLKIND_PAE_PDPT:
4075 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4076 case PGMPOOLKIND_ROOT_NESTED:
4077 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4078 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4079# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
4080 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
4081 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
4082 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
4083 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
4084 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
4085#endif
4086 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
4087 break;
4088
4089 default:
4090 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
4091 }
4092 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
4093}
4094
4095
4096/**
4097 * Clears all users of a page.
4098 */
4099static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4100{
4101 /*
4102 * Free all the user records.
4103 */
4104 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
4105
4106 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
4107 uint16_t i = pPage->iUserHead;
4108 while (i != NIL_PGMPOOL_USER_INDEX)
4109 {
4110 /* Clear enter in user table. */
4111 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
4112
4113 /* Free it. */
4114 const uint16_t iNext = paUsers[i].iNext;
4115 paUsers[i].iUser = NIL_PGMPOOL_IDX;
4116 paUsers[i].iNext = pPool->iUserFreeHead;
4117 pPool->iUserFreeHead = i;
4118
4119 /* Next. */
4120 i = iNext;
4121 }
4122 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
4123}
4124
4125
4126/**
4127 * Allocates a new physical cross reference extent.
4128 *
4129 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
4130 * @param pVM The cross context VM structure.
4131 * @param piPhysExt Where to store the phys ext index.
4132 */
4133PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVMCC pVM, uint16_t *piPhysExt)
4134{
4135 PGM_LOCK_ASSERT_OWNER(pVM);
4136 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4137 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
4138 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4139 {
4140 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
4141 return NULL;
4142 }
4143 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4144 pPool->iPhysExtFreeHead = pPhysExt->iNext;
4145 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4146 *piPhysExt = iPhysExt;
4147 return pPhysExt;
4148}
4149
4150
4151/**
4152 * Frees a physical cross reference extent.
4153 *
4154 * @param pVM The cross context VM structure.
4155 * @param iPhysExt The extent to free.
4156 */
4157void pgmPoolTrackPhysExtFree(PVMCC pVM, uint16_t iPhysExt)
4158{
4159 PGM_LOCK_ASSERT_OWNER(pVM);
4160 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4161 Assert(iPhysExt < pPool->cMaxPhysExts);
4162 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4163 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4164 {
4165 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4166 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4167 }
4168 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4169 pPool->iPhysExtFreeHead = iPhysExt;
4170}
4171
4172
4173/**
4174 * Frees a physical cross reference extent.
4175 *
4176 * @param pVM The cross context VM structure.
4177 * @param iPhysExt The extent to free.
4178 */
4179void pgmPoolTrackPhysExtFreeList(PVMCC pVM, uint16_t iPhysExt)
4180{
4181 PGM_LOCK_ASSERT_OWNER(pVM);
4182 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4183
4184 const uint16_t iPhysExtStart = iPhysExt;
4185 PPGMPOOLPHYSEXT pPhysExt;
4186 do
4187 {
4188 Assert(iPhysExt < pPool->cMaxPhysExts);
4189 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4190 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4191 {
4192 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4193 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4194 }
4195
4196 /* next */
4197 iPhysExt = pPhysExt->iNext;
4198 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4199
4200 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4201 pPool->iPhysExtFreeHead = iPhysExtStart;
4202}
4203
4204
4205/**
4206 * Insert a reference into a list of physical cross reference extents.
4207 *
4208 * @returns The new tracking data for PGMPAGE.
4209 *
4210 * @param pVM The cross context VM structure.
4211 * @param iPhysExt The physical extent index of the list head.
4212 * @param iShwPT The shadow page table index.
4213 * @param iPte Page table entry
4214 *
4215 */
4216static uint16_t pgmPoolTrackPhysExtInsert(PVMCC pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
4217{
4218 PGM_LOCK_ASSERT_OWNER(pVM);
4219 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4220 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4221
4222 /*
4223 * Special common cases.
4224 */
4225 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
4226 {
4227 paPhysExts[iPhysExt].aidx[1] = iShwPT;
4228 paPhysExts[iPhysExt].apte[1] = iPte;
4229 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
4230 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
4231 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4232 }
4233 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
4234 {
4235 paPhysExts[iPhysExt].aidx[2] = iShwPT;
4236 paPhysExts[iPhysExt].apte[2] = iPte;
4237 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
4238 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
4239 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4240 }
4241 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
4242
4243 /*
4244 * General treatment.
4245 */
4246 const uint16_t iPhysExtStart = iPhysExt;
4247 unsigned cMax = 15;
4248 for (;;)
4249 {
4250 Assert(iPhysExt < pPool->cMaxPhysExts);
4251 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4252 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
4253 {
4254 paPhysExts[iPhysExt].aidx[i] = iShwPT;
4255 paPhysExts[iPhysExt].apte[i] = iPte;
4256 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
4257 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
4258 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
4259 }
4260 if (!--cMax)
4261 {
4262 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackOverflows);
4263 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4264 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
4265 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4266 }
4267
4268 /* advance */
4269 iPhysExt = paPhysExts[iPhysExt].iNext;
4270 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4271 break;
4272 }
4273
4274 /*
4275 * Add another extent to the list.
4276 */
4277 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4278 if (!pNew)
4279 {
4280 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackNoExtentsLeft);
4281 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4282 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4283 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4284 }
4285 pNew->iNext = iPhysExtStart;
4286 pNew->aidx[0] = iShwPT;
4287 pNew->apte[0] = iPte;
4288 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4289 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4290}
4291
4292
4293/**
4294 * Add a reference to guest physical page where extents are in use.
4295 *
4296 * @returns The new tracking data for PGMPAGE.
4297 *
4298 * @param pVM The cross context VM structure.
4299 * @param pPhysPage Pointer to the aPages entry in the ram range.
4300 * @param u16 The ram range flags (top 16-bits).
4301 * @param iShwPT The shadow page table index.
4302 * @param iPte Page table entry
4303 */
4304uint16_t pgmPoolTrackPhysExtAddref(PVMCC pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4305{
4306 PGM_LOCK_VOID(pVM);
4307 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4308 {
4309 /*
4310 * Convert to extent list.
4311 */
4312 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4313 uint16_t iPhysExt;
4314 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4315 if (pPhysExt)
4316 {
4317 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4318 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliased);
4319 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4320 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4321 pPhysExt->aidx[1] = iShwPT;
4322 pPhysExt->apte[1] = iPte;
4323 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4324 }
4325 else
4326 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4327 }
4328 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4329 {
4330 /*
4331 * Insert into the extent list.
4332 */
4333 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4334 }
4335 else
4336 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedLots);
4337 PGM_UNLOCK(pVM);
4338 return u16;
4339}
4340
4341
4342/**
4343 * Clear references to guest physical memory.
4344 *
4345 * @param pPool The pool.
4346 * @param pPage The page.
4347 * @param pPhysPage Pointer to the aPages entry in the ram range.
4348 * @param iPte Shadow PTE index
4349 */
4350void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4351{
4352 PVMCC pVM = pPool->CTX_SUFF(pVM);
4353 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4354 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4355
4356 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4357 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4358 {
4359 PGM_LOCK_VOID(pVM);
4360
4361 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4362 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4363 do
4364 {
4365 Assert(iPhysExt < pPool->cMaxPhysExts);
4366
4367 /*
4368 * Look for the shadow page and check if it's all freed.
4369 */
4370 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4371 {
4372 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4373 && paPhysExts[iPhysExt].apte[i] == iPte)
4374 {
4375 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4376 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4377
4378 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4379 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4380 {
4381 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4382 PGM_UNLOCK(pVM);
4383 return;
4384 }
4385
4386 /* we can free the node. */
4387 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4388 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4389 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4390 {
4391 /* lonely node */
4392 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4393 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4394 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4395 }
4396 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4397 {
4398 /* head */
4399 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4400 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4401 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4402 }
4403 else
4404 {
4405 /* in list */
4406 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4407 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4408 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4409 }
4410 iPhysExt = iPhysExtNext;
4411 PGM_UNLOCK(pVM);
4412 return;
4413 }
4414 }
4415
4416 /* next */
4417 iPhysExtPrev = iPhysExt;
4418 iPhysExt = paPhysExts[iPhysExt].iNext;
4419 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4420
4421 PGM_UNLOCK(pVM);
4422 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4423 }
4424 else /* nothing to do */
4425 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4426}
4427
4428/**
4429 * Clear references to guest physical memory.
4430 *
4431 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4432 * physical address is assumed to be correct, so the linear search can be
4433 * skipped and we can assert at an earlier point.
4434 *
4435 * @param pPool The pool.
4436 * @param pPage The page.
4437 * @param HCPhys The host physical address corresponding to the guest page.
4438 * @param GCPhys The guest physical address corresponding to HCPhys.
4439 * @param iPte Shadow PTE index
4440 */
4441static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4442{
4443 /*
4444 * Lookup the page and check if it checks out before derefing it.
4445 */
4446 PVMCC pVM = pPool->CTX_SUFF(pVM);
4447 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4448 if (pPhysPage)
4449 {
4450 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4451#ifdef LOG_ENABLED
4452 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4453 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4454#endif
4455 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4456 {
4457 Assert(pPage->cPresent);
4458 Assert(pPool->cPresent);
4459 pPage->cPresent--;
4460 pPool->cPresent--;
4461 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4462 return;
4463 }
4464
4465 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp iPte=%u fIsNested=%RTbool\n",
4466 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage), iPte, PGMPOOL_PAGE_IS_NESTED(pPage)));
4467 }
4468 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4469}
4470
4471
4472/**
4473 * Clear references to guest physical memory.
4474 *
4475 * @param pPool The pool.
4476 * @param pPage The page.
4477 * @param HCPhys The host physical address corresponding to the guest page.
4478 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4479 * @param iPte Shadow pte index
4480 */
4481void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4482{
4483 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4484
4485 /*
4486 * Try the hint first.
4487 */
4488 RTHCPHYS HCPhysHinted;
4489 PVMCC pVM = pPool->CTX_SUFF(pVM);
4490 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4491 if (pPhysPage)
4492 {
4493 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4494 Assert(HCPhysHinted);
4495 if (HCPhysHinted == HCPhys)
4496 {
4497 Assert(pPage->cPresent);
4498 Assert(pPool->cPresent);
4499 pPage->cPresent--;
4500 pPool->cPresent--;
4501 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4502 return;
4503 }
4504 }
4505 else
4506 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4507
4508 /*
4509 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4510 */
4511 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4512 uint32_t const idRamRangeMax = RT_MIN(pVM->pgm.s.idRamRangeMax, RT_ELEMENTS(pVM->pgm.s.apRamRanges) - 1U);
4513 Assert(pVM->pgm.s.apRamRanges[0] == NULL);
4514 for (uint32_t idx = 1; idx <= idRamRangeMax; idx++)
4515 {
4516 PPGMRAMRANGE const pRam = pVM->CTX_EXPR(pgm, pgmr0, pgm).s.apRamRanges[idx];
4517 AssertContinue(pRam);
4518 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4519 while (iPage-- > 0)
4520 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4521 {
4522 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4523 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4524 Assert(pPage->cPresent);
4525 Assert(pPool->cPresent);
4526 pPage->cPresent--;
4527 pPool->cPresent--;
4528 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4529 return;
4530 }
4531 }
4532
4533 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4534}
4535
4536
4537/**
4538 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4539 *
4540 * @param pPool The pool.
4541 * @param pPage The page.
4542 * @param pShwPT The shadow page table (mapping of the page).
4543 * @param pGstPT The guest page table.
4544 */
4545DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4546{
4547 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4548 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4549 {
4550 const X86PGUINT uPte = pShwPT->a[i].u;
4551 Assert(!(uPte & RT_BIT_32(10)));
4552 if (uPte & X86_PTE_P)
4553 {
4554 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4555 i, uPte & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4556 pgmPoolTracDerefGCPhysHint(pPool, pPage, uPte & X86_PTE_PG_MASK, pGstPT->a[i].u & fPgMask, i);
4557 if (!pPage->cPresent)
4558 break;
4559 }
4560 }
4561}
4562
4563
4564/**
4565 * Clear references to guest physical memory in a PAE / 32-bit page table.
4566 *
4567 * @param pPool The pool.
4568 * @param pPage The page.
4569 * @param pShwPT The shadow page table (mapping of the page).
4570 * @param pGstPT The guest page table (just a half one).
4571 */
4572DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4573{
4574 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4575 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4576 {
4577 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4578 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4579 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4580 {
4581 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4582 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4583 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4584 if (!pPage->cPresent)
4585 break;
4586 }
4587 }
4588}
4589
4590
4591/**
4592 * Clear references to guest physical memory in a PAE / PAE page table.
4593 *
4594 * @param pPool The pool.
4595 * @param pPage The page.
4596 * @param pShwPT The shadow page table (mapping of the page).
4597 * @param pGstPT The guest page table.
4598 */
4599DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4600{
4601 RTGCPHYS const fPgMask = pPage->fA20Enabled ? X86_PTE_PAE_PG_MASK : X86_PTE_PAE_PG_MASK & ~RT_BIT_64(20);
4602 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4603 {
4604 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4605 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4606 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4607 {
4608 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4609 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4610 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4611 if (!pPage->cPresent)
4612 break;
4613 }
4614 }
4615}
4616
4617
4618/**
4619 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4620 *
4621 * @param pPool The pool.
4622 * @param pPage The page.
4623 * @param pShwPT The shadow page table (mapping of the page).
4624 */
4625DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4626{
4627 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4628 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4629 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4630 {
4631 const X86PGUINT uPte = pShwPT->a[i].u;
4632 Assert(!(uPte & RT_BIT_32(10)));
4633 if (uPte & X86_PTE_P)
4634 {
4635 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4636 i, uPte & X86_PTE_PG_MASK, GCPhys));
4637 pgmPoolTracDerefGCPhys(pPool, pPage, uPte & X86_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4638 if (!pPage->cPresent)
4639 break;
4640 }
4641 }
4642}
4643
4644
4645/**
4646 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4647 *
4648 * @param pPool The pool.
4649 * @param pPage The page.
4650 * @param pShwPT The shadow page table (mapping of the page).
4651 */
4652DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4653{
4654 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4655 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4656 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4657 {
4658 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4659 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4660 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4661 {
4662 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4663 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4664 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys & GCPhysA20Mask, i);
4665 if (!pPage->cPresent)
4666 break;
4667 }
4668 }
4669}
4670
4671
4672/**
4673 * Clear references to shadowed pages in an EPT page table.
4674 *
4675 * @param pPool The pool.
4676 * @param pPage The page.
4677 * @param pShwPT The shadow page directory pointer table (mapping of the
4678 * page).
4679 */
4680DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4681{
4682 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4683 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4684 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4685 {
4686 X86PGPAEUINT const uPte = pShwPT->a[i].u;
4687 Assert((uPte & UINT64_C(0xfff0000000000f80)) == 0);
4688 if (uPte & EPT_E_READ)
4689 {
4690 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4691 i, uPte & EPT_PTE_PG_MASK, pPage->GCPhys));
4692 pgmPoolTracDerefGCPhys(pPool, pPage, uPte & EPT_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4693 if (!pPage->cPresent)
4694 break;
4695 }
4696 }
4697}
4698
4699#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
4700
4701/**
4702 * Clears references to shadowed pages in a SLAT EPT page table.
4703 *
4704 * @param pPool The pool.
4705 * @param pPage The page.
4706 * @param pShwPT The shadow page table (mapping of the page).
4707 * @param pGstPT The guest page table.
4708 */
4709DECLINLINE(void) pgmPoolTrackDerefNestedPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT, PCEPTPT pGstPT)
4710{
4711 Assert(PGMPOOL_PAGE_IS_NESTED(pPage));
4712 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4713 {
4714 X86PGPAEUINT const uShwPte = pShwPT->a[i].u;
4715 Assert((uShwPte & UINT64_C(0xfff0000000000f80)) == 0); /* Access, Dirty, UserX (not supported) and ignored bits 7, 11. */
4716 if (uShwPte & EPT_PRESENT_MASK)
4717 {
4718 Log7Func(("Shw=%RX64 GstPte=%RX64\n", uShwPte, pGstPT->a[i].u));
4719 pgmPoolTracDerefGCPhys(pPool, pPage, uShwPte & EPT_PTE_PG_MASK, pGstPT->a[i].u & EPT_PTE_PG_MASK, i);
4720 if (!pPage->cPresent)
4721 break;
4722 }
4723 }
4724}
4725
4726
4727/**
4728 * Clear references to guest physical memory in a SLAT 2MB EPT page table.
4729 *
4730 * @param pPool The pool.
4731 * @param pPage The page.
4732 * @param pShwPT The shadow page table (mapping of the page).
4733 */
4734DECLINLINE(void) pgmPoolTrackDerefNestedPTEPT2MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4735{
4736 Assert(pPage->fA20Enabled);
4737 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4738 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4739 {
4740 X86PGPAEUINT const uShwPte = pShwPT->a[i].u;
4741 Assert((uShwPte & UINT64_C(0xfff0000000000f80)) == 0); /* Access, Dirty, UserX (not supported) and ignored bits 7, 11. */
4742 if (uShwPte & EPT_PRESENT_MASK)
4743 {
4744 Log7Func(("Shw=%RX64 GstPte=%RX64\n", uShwPte, GCPhys));
4745 pgmPoolTracDerefGCPhys(pPool, pPage, uShwPte & EPT_PTE_PG_MASK, GCPhys, i);
4746 if (!pPage->cPresent)
4747 break;
4748 }
4749 }
4750}
4751
4752
4753/**
4754 * Clear references to shadowed pages in a SLAT EPT page directory.
4755 *
4756 * @param pPool The pool.
4757 * @param pPage The page.
4758 * @param pShwPD The shadow page directory (mapping of the page).
4759 * @param pGstPD The guest page directory.
4760 */
4761DECLINLINE(void) pgmPoolTrackDerefNestedPDEpt(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD, PCEPTPD pGstPD)
4762{
4763 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4764 {
4765 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4766#ifdef PGM_WITH_LARGE_PAGES
4767 AssertMsg((uPde & UINT64_C(0xfff0000000000f00)) == 0, ("uPde=%RX64\n", uPde));
4768#else
4769 AssertMsg((uPde & UINT64_C(0xfff0000000000f80)) == 0, ("uPde=%RX64\n", uPde));
4770#endif
4771 if (uPde & EPT_PRESENT_MASK)
4772 {
4773#ifdef PGM_WITH_LARGE_PAGES
4774 if (uPde & EPT_E_LEAF)
4775 {
4776 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n", i, uPde & EPT_PDE2M_PG_MASK, pPage->GCPhys));
4777 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & EPT_PDE2M_PG_MASK, pGstPD->a[i].u & EPT_PDE2M_PG_MASK, i);
4778 }
4779 else
4780#endif
4781 {
4782 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & EPT_PDE_PG_MASK);
4783 if (pSubPage)
4784 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4785 else
4786 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4787 }
4788 }
4789 }
4790}
4791
4792
4793/**
4794 * Clear references to shadowed pages in a SLAT EPT PML4 table.
4795 *
4796 * @param pPool The pool.
4797 * @param pPage The page.
4798 * @param pShwPml4 The shadow PML4 table.
4799 */
4800DECLINLINE(void) pgmPoolTrackDerefNestedPML4(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPML4 pShwPml4)
4801{
4802 Assert(PGMPOOL_PAGE_IS_NESTED(pPage));
4803 for (unsigned i = 0; i < RT_ELEMENTS(pShwPml4->a); i++)
4804 {
4805 X86PGPAEUINT const uPml4e = pShwPml4->a[i].u;
4806 AssertMsg((uPml4e & (EPT_PML4E_MBZ_MASK | 0xfff0000000000f00)) == 0, ("uPml4e=%RX64\n", uPml4e));
4807 if (uPml4e & EPT_PRESENT_MASK)
4808 {
4809 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPml4e & EPT_PML4E_PG_MASK);
4810 if (pSubPage)
4811 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4812 else
4813 AssertFatalMsgFailed(("%RX64\n", uPml4e & X86_PML4E_PG_MASK));
4814 }
4815 }
4816}
4817#endif /* VBOX_WITH_NESTED_HWVIRT_VMX_EPT */
4818
4819
4820/**
4821 * Clear references to shadowed pages in a 32 bits page directory.
4822 *
4823 * @param pPool The pool.
4824 * @param pPage The page.
4825 * @param pShwPD The shadow page directory (mapping of the page).
4826 */
4827DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4828{
4829 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4830 {
4831 X86PGUINT const uPde = pShwPD->a[i].u;
4832 if (uPde & X86_PDE_P)
4833 {
4834 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4835 if (pSubPage)
4836 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4837 else
4838 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4839 }
4840 }
4841}
4842
4843
4844/**
4845 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4846 *
4847 * @param pPool The pool.
4848 * @param pPage The page.
4849 * @param pShwPD The shadow page directory (mapping of the page).
4850 */
4851DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4852{
4853 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4854 {
4855 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4856 if (uPde & X86_PDE_P)
4857 {
4858#ifdef PGM_WITH_LARGE_PAGES
4859 if (uPde & X86_PDE_PS)
4860 {
4861 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4862 i, uPde & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4863 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & X86_PDE2M_PAE_PG_MASK,
4864 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4865 i);
4866 }
4867 else
4868#endif
4869 {
4870 Assert((uPde & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000000))) == 0);
4871 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & X86_PDE_PAE_PG_MASK);
4872 if (pSubPage)
4873 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4874 else
4875 AssertFatalMsgFailed(("%RX64\n", uPde & X86_PDE_PAE_PG_MASK));
4876 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4877 }
4878 }
4879 }
4880}
4881
4882
4883/**
4884 * Clear references to shadowed pages in a PAE page directory pointer table.
4885 *
4886 * @param pPool The pool.
4887 * @param pPage The page.
4888 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4889 */
4890DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4891{
4892 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4893 {
4894 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4895 Assert((uPdpe & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4896 if (uPdpe & X86_PDPE_P)
4897 {
4898 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & X86_PDPE_PG_MASK);
4899 if (pSubPage)
4900 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4901 else
4902 AssertFatalMsgFailed(("%RX64\n", uPdpe & X86_PDPE_PG_MASK));
4903 }
4904 }
4905}
4906
4907
4908/**
4909 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4910 *
4911 * @param pPool The pool.
4912 * @param pPage The page.
4913 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4914 */
4915DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4916{
4917 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4918 {
4919 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4920 Assert((uPdpe & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4921 if (uPdpe & X86_PDPE_P)
4922 {
4923 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & X86_PDPE_PG_MASK);
4924 if (pSubPage)
4925 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4926 else
4927 AssertFatalMsgFailed(("%RX64\n", uPdpe & X86_PDPE_PG_MASK));
4928 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4929 }
4930 }
4931}
4932
4933
4934/**
4935 * Clear references to shadowed pages in a 64-bit level 4 page table.
4936 *
4937 * @param pPool The pool.
4938 * @param pPage The page.
4939 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4940 */
4941DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4942{
4943 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4944 {
4945 X86PGPAEUINT const uPml4e = pShwPML4->a[i].u;
4946 Assert((uPml4e & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4947 if (uPml4e & X86_PML4E_P)
4948 {
4949 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPml4e & X86_PDPE_PG_MASK);
4950 if (pSubPage)
4951 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4952 else
4953 AssertFatalMsgFailed(("%RX64\n", uPml4e & X86_PML4E_PG_MASK));
4954 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4955 }
4956 }
4957}
4958
4959
4960/**
4961 * Clear references to shadowed pages in an EPT page directory.
4962 *
4963 * @param pPool The pool.
4964 * @param pPage The page.
4965 * @param pShwPD The shadow page directory (mapping of the page).
4966 */
4967DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4968{
4969 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4970 {
4971 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4972#ifdef PGM_WITH_LARGE_PAGES
4973 AssertMsg((uPde & UINT64_C(0xfff0000000000f00)) == 0, ("uPde=%RX64\n", uPde));
4974#else
4975 AssertMsg((uPde & UINT64_C(0xfff0000000000f80)) == 0, ("uPde=%RX64\n", uPde));
4976#endif
4977 if (uPde & EPT_E_READ)
4978 {
4979#ifdef PGM_WITH_LARGE_PAGES
4980 if (uPde & EPT_E_LEAF)
4981 {
4982 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4983 i, uPde & EPT_PDE2M_PG_MASK, pPage->GCPhys));
4984 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & EPT_PDE2M_PG_MASK,
4985 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4986 i);
4987 }
4988 else
4989#endif
4990 {
4991 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & EPT_PDE_PG_MASK);
4992 if (pSubPage)
4993 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4994 else
4995 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4996 }
4997 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4998 }
4999 }
5000}
5001
5002
5003/**
5004 * Clear references to shadowed pages in an EPT page directory pointer table.
5005 *
5006 * @param pPool The pool.
5007 * @param pPage The page.
5008 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
5009 */
5010DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
5011{
5012 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
5013 {
5014 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
5015 Assert((uPdpe & UINT64_C(0xfff0000000000f80)) == 0);
5016 if (uPdpe & EPT_E_READ)
5017 {
5018 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & EPT_PDPTE_PG_MASK);
5019 if (pSubPage)
5020 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
5021 else
5022 AssertFatalMsgFailed(("%RX64\n", uPdpe & EPT_PDPTE_PG_MASK));
5023 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
5024 }
5025 }
5026}
5027
5028
5029/**
5030 * Clears all references made by this page.
5031 *
5032 * This includes other shadow pages and GC physical addresses.
5033 *
5034 * @param pPool The pool.
5035 * @param pPage The page.
5036 */
5037static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
5038{
5039 /*
5040 * Map the shadow page and take action according to the page kind.
5041 */
5042 PVMCC pVM = pPool->CTX_SUFF(pVM);
5043 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5044 switch (pPage->enmKind)
5045 {
5046 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5047 {
5048 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
5049 void *pvGst;
5050 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
5051 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
5052 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
5053 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
5054 break;
5055 }
5056
5057 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5058 {
5059 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
5060 void *pvGst;
5061 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
5062 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
5063 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
5064 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
5065 break;
5066 }
5067
5068 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5069 {
5070 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
5071 void *pvGst;
5072 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
5073 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
5074 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
5075 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
5076 break;
5077 }
5078
5079 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
5080 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5081 {
5082 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
5083 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
5084 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
5085 break;
5086 }
5087
5088 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
5089 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5090 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5091 {
5092 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
5093 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
5094 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
5095 break;
5096 }
5097
5098 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5099 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5100 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5101 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5102 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5103 case PGMPOOLKIND_PAE_PD_PHYS:
5104 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5105 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5106 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
5107 break;
5108
5109 case PGMPOOLKIND_32BIT_PD_PHYS:
5110 case PGMPOOLKIND_32BIT_PD:
5111 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
5112 break;
5113
5114 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5115 case PGMPOOLKIND_PAE_PDPT:
5116 case PGMPOOLKIND_PAE_PDPT_PHYS:
5117 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
5118 break;
5119
5120 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5121 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5122 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
5123 break;
5124
5125 case PGMPOOLKIND_64BIT_PML4:
5126 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
5127 break;
5128
5129 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5130 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
5131 break;
5132
5133 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5134 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
5135 break;
5136
5137 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5138 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
5139 break;
5140
5141#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
5142 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
5143 {
5144 void *pvGst;
5145 int const rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
5146 pgmPoolTrackDerefNestedPTEPT(pPool, pPage, (PEPTPT)pvShw, (PCEPTPT)pvGst);
5147 break;
5148 }
5149
5150 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
5151 pgmPoolTrackDerefNestedPTEPT2MB(pPool, pPage, (PEPTPT)pvShw);
5152 break;
5153
5154 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
5155 {
5156 void *pvGst;
5157 int const rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
5158 pgmPoolTrackDerefNestedPDEpt(pPool, pPage, (PEPTPD)pvShw, (PCEPTPD)pvGst);
5159 break;
5160 }
5161
5162 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
5163 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
5164 break;
5165
5166 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
5167 pgmPoolTrackDerefNestedPML4(pPool, pPage, (PEPTPML4)pvShw);
5168 break;
5169#endif
5170
5171 default:
5172 AssertFatalMsgFailed(("enmKind=%d GCPhys=%RGp\n", pPage->enmKind, pPage->GCPhys));
5173 }
5174
5175 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
5176 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5177 RT_BZERO(pvShw, PAGE_SIZE);
5178 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5179 pPage->fZeroed = true;
5180 Assert(!pPage->cPresent);
5181 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
5182}
5183
5184
5185/**
5186 * Flushes a pool page.
5187 *
5188 * This moves the page to the free list after removing all user references to it.
5189 *
5190 * @returns VBox status code.
5191 * @retval VINF_SUCCESS on success.
5192 * @param pPool The pool.
5193 * @param pPage The shadow page.
5194 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
5195 */
5196int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
5197{
5198 PVMCC pVM = pPool->CTX_SUFF(pVM);
5199 bool fFlushRequired = false;
5200
5201 int rc = VINF_SUCCESS;
5202 STAM_PROFILE_START(&pPool->StatFlushPage, f);
5203 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
5204 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
5205
5206 if (PGMPOOL_PAGE_IS_NESTED(pPage))
5207 Log7Func(("pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
5208 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
5209
5210 /*
5211 * Reject any attempts at flushing any of the special root pages (shall
5212 * not happen).
5213 */
5214 AssertMsgReturn(pPage->idx >= PGMPOOL_IDX_FIRST,
5215 ("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n",
5216 pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx),
5217 VINF_SUCCESS);
5218
5219 PGM_LOCK_VOID(pVM);
5220
5221 /*
5222 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
5223 */
5224 if (pgmPoolIsPageLocked(pPage))
5225 {
5226#if !defined(VBOX_VMM_TARGET_ARMV8)
5227 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
5228 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
5229 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
5230 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
5231 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
5232 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
5233 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
5234 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
5235 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
5236 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
5237 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
5238#endif
5239 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
5240 PGM_UNLOCK(pVM);
5241 return VINF_SUCCESS;
5242 }
5243
5244 /*
5245 * Mark the page as being in need of an ASMMemZeroPage().
5246 */
5247 pPage->fZeroed = false;
5248
5249#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5250 if (pPage->fDirty)
5251 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirtyEntry, false /* do not remove */);
5252#endif
5253
5254 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
5255 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
5256 fFlushRequired = true;
5257
5258 /*
5259 * Clear the page.
5260 */
5261 pgmPoolTrackClearPageUsers(pPool, pPage);
5262 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
5263 pgmPoolTrackDeref(pPool, pPage);
5264 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
5265
5266 /*
5267 * Flush it from the cache.
5268 */
5269 pgmPoolCacheFlushPage(pPool, pPage);
5270
5271 /*
5272 * Deregistering the monitoring.
5273 */
5274 if (pPage->fMonitored)
5275 rc = pgmPoolMonitorFlush(pPool, pPage);
5276
5277 /*
5278 * Free the page.
5279 */
5280 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
5281 pPage->iNext = pPool->iFreeHead;
5282 pPool->iFreeHead = pPage->idx;
5283 pPage->enmKind = PGMPOOLKIND_FREE;
5284 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5285 pPage->GCPhys = NIL_RTGCPHYS;
5286 pPage->fReusedFlushPending = false;
5287
5288 pPool->cUsedPages--;
5289
5290 /* Flush the TLBs of all VCPUs if required. */
5291 if ( fFlushRequired
5292 && fFlush)
5293 {
5294 PGM_INVL_ALL_VCPU_TLBS(pVM);
5295 }
5296
5297 PGM_UNLOCK(pVM);
5298 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
5299 return rc;
5300}
5301
5302
5303/**
5304 * Frees a usage of a pool page.
5305 *
5306 * The caller is responsible to updating the user table so that it no longer
5307 * references the shadow page.
5308 *
5309 * @param pPool The pool.
5310 * @param pPage The shadow page.
5311 * @param iUser The shadow page pool index of the user table.
5312 * NIL_PGMPOOL_IDX for root pages.
5313 * @param iUserTable The index into the user table (shadowed). Ignored if
5314 * root page.
5315 */
5316void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
5317{
5318 PVMCC pVM = pPool->CTX_SUFF(pVM);
5319
5320 STAM_PROFILE_START(&pPool->StatFree, a);
5321 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
5322 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
5323 AssertReturnVoid(pPage->idx >= PGMPOOL_IDX_FIRST); /* paranoia (#6349) */
5324
5325 PGM_LOCK_VOID(pVM);
5326 if (iUser != NIL_PGMPOOL_IDX)
5327 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
5328 if (!pPage->fCached)
5329 pgmPoolFlushPage(pPool, pPage);
5330 PGM_UNLOCK(pVM);
5331 STAM_PROFILE_STOP(&pPool->StatFree, a);
5332}
5333
5334
5335/**
5336 * Makes one or more free page free.
5337 *
5338 * @returns VBox status code.
5339 * @retval VINF_SUCCESS on success.
5340 *
5341 * @param pPool The pool.
5342 * @param enmKind Page table kind
5343 * @param iUser The user of the page.
5344 */
5345static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
5346{
5347 PVMCC pVM = pPool->CTX_SUFF(pVM);
5348 LogFlow(("pgmPoolMakeMoreFreePages: enmKind=%d iUser=%d\n", enmKind, iUser));
5349 NOREF(enmKind);
5350
5351 /*
5352 * If the pool isn't full grown yet, expand it.
5353 */
5354 if (pPool->cCurPages < pPool->cMaxPages)
5355 {
5356 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
5357#ifdef IN_RING3
5358 int rc = PGMR3PoolGrow(pVM, VMMGetCpu(pVM));
5359#else
5360 int rc = PGMR0PoolGrow(pVM, VMMGetCpuId(pVM));
5361#endif
5362 if (RT_FAILURE(rc))
5363 return rc;
5364 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
5365 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
5366 return VINF_SUCCESS;
5367 }
5368
5369 /*
5370 * Free one cached page.
5371 */
5372 return pgmPoolCacheFreeOne(pPool, iUser);
5373}
5374
5375
5376/**
5377 * Allocates a page from the pool.
5378 *
5379 * This page may actually be a cached page and not in need of any processing
5380 * on the callers part.
5381 *
5382 * @returns VBox status code.
5383 * @retval VINF_SUCCESS if a NEW page was allocated.
5384 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
5385 *
5386 * @param pVM The cross context VM structure.
5387 * @param GCPhys The GC physical address of the page we're gonna shadow.
5388 * For 4MB and 2MB PD entries, it's the first address the
5389 * shadow PT is covering.
5390 * @param enmKind The kind of mapping.
5391 * @param enmAccess Access type for the mapping (only relevant for big pages)
5392 * @param fA20Enabled Whether the A20 gate is enabled or not.
5393 * @param iUser The shadow page pool index of the user table. Root
5394 * pages should pass NIL_PGMPOOL_IDX.
5395 * @param iUserTable The index into the user table (shadowed). Ignored for
5396 * root pages (iUser == NIL_PGMPOOL_IDX).
5397 * @param fLockPage Lock the page
5398 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
5399 */
5400int pgmPoolAlloc(PVMCC pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
5401 uint16_t iUser, uint32_t iUserTable, bool fLockPage, PPPGMPOOLPAGE ppPage)
5402{
5403 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5404 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
5405 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
5406 *ppPage = NULL;
5407 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
5408 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
5409 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
5410
5411#if defined(VBOX_STRICT) && defined(VBOX_WITH_NESTED_HWVIRT_VMX_EPT)
5412 PVMCPUCC pVCpu = VMMGetCpu(pVM);
5413 Assert(pVCpu->pgm.s.enmGuestSlatMode == PGMSLAT_DIRECT || PGMPOOL_PAGE_IS_KIND_NESTED(enmKind));
5414#endif
5415
5416 PGM_LOCK_VOID(pVM);
5417
5418 if (pPool->fCacheEnabled)
5419 {
5420 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, fA20Enabled, iUser, iUserTable, ppPage);
5421 if (RT_SUCCESS(rc2))
5422 {
5423 if (fLockPage)
5424 pgmPoolLockPage(pPool, *ppPage);
5425 PGM_UNLOCK(pVM);
5426 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5427 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
5428 return rc2;
5429 }
5430 }
5431
5432 /*
5433 * Allocate a new one.
5434 */
5435 int rc = VINF_SUCCESS;
5436 uint16_t iNew = pPool->iFreeHead;
5437 if (iNew == NIL_PGMPOOL_IDX)
5438 {
5439 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
5440 if (RT_FAILURE(rc))
5441 {
5442 PGM_UNLOCK(pVM);
5443 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
5444 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5445 return rc;
5446 }
5447 iNew = pPool->iFreeHead;
5448 AssertReleaseMsgReturn(iNew != NIL_PGMPOOL_IDX, ("iNew=%#x\n", iNew), VERR_PGM_POOL_IPE);
5449 }
5450
5451 /* unlink the free head */
5452 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5453 pPool->iFreeHead = pPage->iNext;
5454 pPage->iNext = NIL_PGMPOOL_IDX;
5455
5456 /*
5457 * Initialize it.
5458 */
5459 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5460 pPage->enmKind = enmKind;
5461 pPage->enmAccess = enmAccess;
5462 pPage->GCPhys = GCPhys;
5463 pPage->fA20Enabled = fA20Enabled;
5464 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5465 pPage->fMonitored = false;
5466 pPage->fCached = false;
5467 pPage->fDirty = false;
5468 pPage->fReusedFlushPending = false;
5469 pPage->cModifications = 0;
5470 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5471 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5472 pPage->cPresent = 0;
5473 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5474 pPage->idxDirtyEntry = 0;
5475 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5476 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5477 pPage->cLastAccessHandler = 0;
5478 pPage->cLocked = 0;
5479# ifdef VBOX_STRICT
5480 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5481# endif
5482
5483 /*
5484 * Insert into the tracking and cache. If this fails, free the page.
5485 */
5486 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5487 if (RT_FAILURE(rc3))
5488 {
5489 pPool->cUsedPages--;
5490 pPage->enmKind = PGMPOOLKIND_FREE;
5491 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5492 pPage->GCPhys = NIL_RTGCPHYS;
5493 pPage->iNext = pPool->iFreeHead;
5494 pPool->iFreeHead = pPage->idx;
5495 PGM_UNLOCK(pVM);
5496 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5497 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
5498 return rc3;
5499 }
5500
5501 /*
5502 * Commit the allocation, clear the page and return.
5503 */
5504#ifdef VBOX_WITH_STATISTICS
5505 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5506 pPool->cUsedPagesHigh = pPool->cUsedPages;
5507#endif
5508
5509 if (!pPage->fZeroed)
5510 {
5511 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5512 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5513 RT_BZERO(pv, PAGE_SIZE);
5514 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5515 }
5516
5517 *ppPage = pPage;
5518 if (fLockPage)
5519 pgmPoolLockPage(pPool, pPage);
5520 PGM_UNLOCK(pVM);
5521 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5522 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5523 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5524 return rc;
5525}
5526
5527
5528/**
5529 * Frees a usage of a pool page.
5530 *
5531 * @param pVM The cross context VM structure.
5532 * @param HCPhys The HC physical address of the shadow page.
5533 * @param iUser The shadow page pool index of the user table.
5534 * NIL_PGMPOOL_IDX if root page.
5535 * @param iUserTable The index into the user table (shadowed). Ignored if
5536 * root page.
5537 */
5538void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5539{
5540 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5541 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5542 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5543}
5544
5545
5546/**
5547 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5548 *
5549 * @returns Pointer to the shadow page structure.
5550 * @param pPool The pool.
5551 * @param HCPhys The HC physical address of the shadow page.
5552 */
5553PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5554{
5555 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5556
5557 /*
5558 * Look up the page.
5559 */
5560 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5561
5562 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5563 return pPage;
5564}
5565
5566
5567/**
5568 * Internal worker for finding a page for debugging purposes, no assertions.
5569 *
5570 * @returns Pointer to the shadow page structure. NULL on if not found.
5571 * @param pPool The pool.
5572 * @param HCPhys The HC physical address of the shadow page.
5573 */
5574PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5575{
5576 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5577 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5578}
5579
5580
5581/**
5582 * Internal worker for PGM_HCPHYS_2_PTR.
5583 *
5584 * @returns VBox status code.
5585 * @param pVM The cross context VM structure.
5586 * @param HCPhys The HC physical address of the shadow page.
5587 * @param ppv Where to return the address.
5588 */
5589int pgmPoolHCPhys2Ptr(PVM pVM, RTHCPHYS HCPhys, void **ppv)
5590{
5591 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pVM->pgm.s.CTX_SUFF(pPool)->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5592 AssertMsgReturn(pPage && pPage->enmKind != PGMPOOLKIND_FREE,
5593 ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0),
5594 VERR_PGM_POOL_GET_PAGE_FAILED);
5595 *ppv = (uint8_t *)pPage->CTX_SUFF(pvPage) + (HCPhys & PAGE_OFFSET_MASK);
5596 return VINF_SUCCESS;
5597}
5598
5599#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5600
5601/**
5602 * Flush the specified page if present
5603 *
5604 * @param pVM The cross context VM structure.
5605 * @param GCPhys Guest physical address of the page to flush
5606 */
5607void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5608{
5609 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5610
5611 VM_ASSERT_EMT(pVM);
5612
5613 /*
5614 * Look up the GCPhys in the hash.
5615 */
5616 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5617 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5618 if (i == NIL_PGMPOOL_IDX)
5619 return;
5620
5621 do
5622 {
5623 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5624 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5625 {
5626 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage)); /* Temporary to see if it hits. Remove later. */
5627 switch (pPage->enmKind)
5628 {
5629 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5630 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5631 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5632 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5633 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5634 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5635 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5636 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5637 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5638 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5639 case PGMPOOLKIND_64BIT_PML4:
5640 case PGMPOOLKIND_32BIT_PD:
5641 case PGMPOOLKIND_PAE_PDPT:
5642 {
5643 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5644# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5645 if (pPage->fDirty)
5646 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5647 else
5648# endif
5649 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5650 Assert(!pgmPoolIsPageLocked(pPage));
5651 pgmPoolMonitorChainFlush(pPool, pPage);
5652 return;
5653 }
5654
5655 /* ignore, no monitoring. */
5656 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5657 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5658 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5659 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5660 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5661 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5662 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5663 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5664 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5665 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5666 case PGMPOOLKIND_ROOT_NESTED:
5667 case PGMPOOLKIND_PAE_PD_PHYS:
5668 case PGMPOOLKIND_PAE_PDPT_PHYS:
5669 case PGMPOOLKIND_32BIT_PD_PHYS:
5670 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5671 break;
5672
5673 default:
5674 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5675 }
5676 }
5677
5678 /* next */
5679 i = pPage->iNext;
5680 } while (i != NIL_PGMPOOL_IDX);
5681 return;
5682}
5683
5684
5685/**
5686 * Reset CPU on hot plugging.
5687 *
5688 * @param pVM The cross context VM structure.
5689 * @param pVCpu The cross context virtual CPU structure.
5690 */
5691void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5692{
5693 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5694
5695 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5696 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5697 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5698}
5699
5700
5701/**
5702 * Flushes the entire cache.
5703 *
5704 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5705 * this and execute this CR3 flush.
5706 *
5707 * @param pVM The cross context VM structure.
5708 */
5709void pgmR3PoolReset(PVM pVM)
5710{
5711 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5712
5713 PGM_LOCK_ASSERT_OWNER(pVM);
5714 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5715 LogFlow(("pgmR3PoolReset:\n"));
5716
5717 /*
5718 * If there are no pages in the pool, there is nothing to do.
5719 */
5720 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5721 {
5722 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5723 return;
5724 }
5725
5726 /*
5727 * Exit the shadow mode since we're going to clear everything,
5728 * including the root page.
5729 */
5730 VMCC_FOR_EACH_VMCPU(pVM)
5731 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5732 VMCC_FOR_EACH_VMCPU_END(pVM);
5733
5734
5735 /*
5736 * Nuke the free list and reinsert all pages into it.
5737 */
5738 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5739 {
5740 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5741
5742 if (pPage->fMonitored)
5743 pgmPoolMonitorFlush(pPool, pPage);
5744 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5745 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5746 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5747 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5748 pPage->GCPhys = NIL_RTGCPHYS;
5749 pPage->enmKind = PGMPOOLKIND_FREE;
5750 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5751 Assert(pPage->idx == i);
5752 pPage->iNext = i + 1;
5753 pPage->fA20Enabled = true;
5754 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5755 pPage->fSeenNonGlobal = false;
5756 pPage->fMonitored = false;
5757 pPage->fDirty = false;
5758 pPage->fCached = false;
5759 pPage->fReusedFlushPending = false;
5760 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5761 pPage->cPresent = 0;
5762 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5763 pPage->cModifications = 0;
5764 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5765 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5766 pPage->idxDirtyEntry = 0;
5767 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5768 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5769 pPage->cLastAccessHandler = 0;
5770 pPage->cLocked = 0;
5771# ifdef VBOX_STRICT
5772 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5773# endif
5774 }
5775 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5776 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5777 pPool->cUsedPages = 0;
5778
5779 /*
5780 * Zap and reinitialize the user records.
5781 */
5782 pPool->cPresent = 0;
5783 pPool->iUserFreeHead = 0;
5784 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5785 const unsigned cMaxUsers = pPool->cMaxUsers;
5786 for (unsigned i = 0; i < cMaxUsers; i++)
5787 {
5788 paUsers[i].iNext = i + 1;
5789 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5790 paUsers[i].iUserTable = 0xfffffffe;
5791 }
5792 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5793
5794 /*
5795 * Clear all the GCPhys links and rebuild the phys ext free list.
5796 */
5797 uint32_t const idRamRangeMax = RT_MIN(pVM->pgm.s.idRamRangeMax, RT_ELEMENTS(pVM->pgm.s.apRamRanges) - 1U);
5798 Assert(pVM->pgm.s.apRamRanges[0] == NULL);
5799 for (uint32_t idx = 1; idx <= idRamRangeMax; idx++)
5800 {
5801 PPGMRAMRANGE const pRam = pVM->CTX_EXPR(pgm, pgmr0, pgm).s.apRamRanges[idx];
5802 AssertContinue(pRam);
5803 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5804 while (iPage-- > 0)
5805 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5806 }
5807
5808 pPool->iPhysExtFreeHead = 0;
5809 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5810 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5811 for (unsigned i = 0; i < cMaxPhysExts; i++)
5812 {
5813 paPhysExts[i].iNext = i + 1;
5814 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5815 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5816 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5817 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5818 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5819 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5820 }
5821 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5822
5823 /*
5824 * Just zap the modified list.
5825 */
5826 pPool->cModifiedPages = 0;
5827 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5828
5829 /*
5830 * Clear the GCPhys hash and the age list.
5831 */
5832 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5833 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5834 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5835 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5836
5837# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5838 /* Clear all dirty pages. */
5839 pPool->idxFreeDirtyPage = 0;
5840 pPool->cDirtyPages = 0;
5841 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aidxDirtyPages); i++)
5842 pPool->aidxDirtyPages[i] = NIL_PGMPOOL_IDX;
5843# endif
5844
5845 /*
5846 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5847 */
5848 VMCC_FOR_EACH_VMCPU(pVM)
5849 {
5850 /*
5851 * Re-enter the shadowing mode and assert Sync CR3 FF.
5852 */
5853 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5854 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5855 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5856 }
5857 VMCC_FOR_EACH_VMCPU_END(pVM);
5858
5859 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5860}
5861
5862#endif /* IN_RING3 */
5863
5864#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5865/**
5866 * Stringifies a PGMPOOLKIND value.
5867 */
5868static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5869{
5870 switch ((PGMPOOLKIND)enmKind)
5871 {
5872 case PGMPOOLKIND_INVALID:
5873 return "PGMPOOLKIND_INVALID";
5874 case PGMPOOLKIND_FREE:
5875 return "PGMPOOLKIND_FREE";
5876 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5877 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5878 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5879 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5880 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5881 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5882 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5883 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5884 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5885 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5886 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5887 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5888 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5889 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5890 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5891 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5892 case PGMPOOLKIND_32BIT_PD:
5893 return "PGMPOOLKIND_32BIT_PD";
5894 case PGMPOOLKIND_32BIT_PD_PHYS:
5895 return "PGMPOOLKIND_32BIT_PD_PHYS";
5896 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5897 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5898 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5899 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5900 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5901 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5902 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5903 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5904 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5905 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5906 case PGMPOOLKIND_PAE_PD_PHYS:
5907 return "PGMPOOLKIND_PAE_PD_PHYS";
5908 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5909 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5910 case PGMPOOLKIND_PAE_PDPT:
5911 return "PGMPOOLKIND_PAE_PDPT";
5912 case PGMPOOLKIND_PAE_PDPT_PHYS:
5913 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5914 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5915 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5916 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5917 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5918 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5919 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5920 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5921 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5922 case PGMPOOLKIND_64BIT_PML4:
5923 return "PGMPOOLKIND_64BIT_PML4";
5924 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5925 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5926 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5927 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5928 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5929 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5930 case PGMPOOLKIND_ROOT_NESTED:
5931 return "PGMPOOLKIND_ROOT_NESTED";
5932 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
5933 return "PGMPOOLKIND_EPT_PT_FOR_EPT_PT";
5934 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
5935 return "PGMPOOLKIND_EPT_PT_FOR_EPT_2MB";
5936 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
5937 return "PGMPOOLKIND_EPT_PD_FOR_EPT_PD";
5938 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
5939 return "PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT";
5940 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
5941 return "PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4";
5942 }
5943 return "Unknown kind!";
5944}
5945#endif /* LOG_ENABLED || VBOX_STRICT */
5946
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette