VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 99949

Last change on this file since 99949 was 99812, checked in by vboxsync, 21 months ago

VMM/PGM: Nested VMX: bugref:10318 Fixes to flushing of large page page in the nested-guest case. Assertions.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 224.9 KB
Line 
1/* $Id: PGMAllPool.cpp 99812 2023-05-16 09:32:15Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_PGM_POOL
33#define VBOX_WITHOUT_PAGING_BIT_FIELDS /* 64-bit bitfields are just asking for trouble. See @bugref{9841} and others. */
34#include <VBox/vmm/pgm.h>
35#include <VBox/vmm/mm.h>
36#include <VBox/vmm/em.h>
37#include <VBox/vmm/cpum.h>
38#include "PGMInternal.h"
39#include <VBox/vmm/vmcc.h>
40#include "PGMInline.h"
41#include <VBox/vmm/hm_vmx.h>
42
43#include <VBox/log.h>
44#include <VBox/err.h>
45#include <iprt/asm.h>
46#include <iprt/string.h>
47
48
49/*********************************************************************************************************************************
50* Internal Functions *
51*********************************************************************************************************************************/
52RT_C_DECLS_BEGIN
53#if 0 /* unused */
54DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
55DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
56#endif /* unused */
57static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
58static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
59static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
60static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
61#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
62static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
63#endif
64#if 0 /*defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)*/
65static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
66#endif
67
68int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage);
69PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVMCC pVM, uint16_t *piPhysExt);
70void pgmPoolTrackPhysExtFree(PVMCC pVM, uint16_t iPhysExt);
71void pgmPoolTrackPhysExtFreeList(PVMCC pVM, uint16_t iPhysExt);
72
73RT_C_DECLS_END
74
75
76#if 0 /* unused */
77/**
78 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
79 *
80 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
81 * @param enmKind The page kind.
82 */
83DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
84{
85 switch (enmKind)
86 {
87 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
88 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
89 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
90 return true;
91 default:
92 return false;
93 }
94}
95#endif /* unused */
96
97
98/**
99 * Flushes a chain of pages sharing the same access monitor.
100 *
101 * @param pPool The pool.
102 * @param pPage A page in the chain.
103 */
104void pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
105{
106 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
107
108 /*
109 * Find the list head.
110 */
111 uint16_t idx = pPage->idx;
112 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
113 {
114 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
115 {
116 idx = pPage->iMonitoredPrev;
117 Assert(idx != pPage->idx);
118 pPage = &pPool->aPages[idx];
119 }
120 }
121
122 /*
123 * Iterate the list flushing each shadow page.
124 */
125 for (;;)
126 {
127 idx = pPage->iMonitoredNext;
128 Assert(idx != pPage->idx);
129 if (pPage->idx >= PGMPOOL_IDX_FIRST)
130 {
131 int rc2 = pgmPoolFlushPage(pPool, pPage);
132 AssertRC(rc2);
133 }
134 /* next */
135 if (idx == NIL_PGMPOOL_IDX)
136 break;
137 pPage = &pPool->aPages[idx];
138 }
139}
140
141
142/**
143 * Wrapper for getting the current context pointer to the entry being modified.
144 *
145 * @returns VBox status code suitable for scheduling.
146 * @param pVM The cross context VM structure.
147 * @param pvDst Destination address
148 * @param pvSrc Pointer to the mapping of @a GCPhysSrc or NULL depending
149 * on the context (e.g. \#PF in R0 & RC).
150 * @param GCPhysSrc The source guest physical address.
151 * @param cb Size of data to read
152 */
153DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVMCC pVM, void *pvDst, void const *pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
154{
155#if defined(IN_RING3)
156 NOREF(pVM); NOREF(GCPhysSrc);
157 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
158 return VINF_SUCCESS;
159#else
160 /** @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
161 NOREF(pvSrc);
162 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
163#endif
164}
165
166
167/**
168 * Process shadow entries before they are changed by the guest.
169 *
170 * For PT entries we will clear them. For PD entries, we'll simply check
171 * for mapping conflicts and set the SyncCR3 FF if found.
172 *
173 * @param pVCpu The cross context virtual CPU structure.
174 * @param pPool The pool.
175 * @param pPage The head page.
176 * @param GCPhysFault The guest physical fault address.
177 * @param pvAddress Pointer to the mapping of @a GCPhysFault or NULL
178 * depending on the context (e.g. \#PF in R0 & RC).
179 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
180 */
181static void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault,
182 void const *pvAddress, unsigned cbWrite)
183{
184 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
185 const unsigned off = GCPhysFault & GUEST_PAGE_OFFSET_MASK;
186 PVMCC pVM = pPool->CTX_SUFF(pVM);
187 NOREF(pVCpu);
188
189 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n",
190 (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))(uintptr_t)pvAddress, GCPhysFault, cbWrite));
191
192 if (PGMPOOL_PAGE_IS_NESTED(pPage))
193 Log7Func(("%RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))(uintptr_t)pvAddress, GCPhysFault, cbWrite));
194
195 for (;;)
196 {
197 union
198 {
199 void *pv;
200 PX86PT pPT;
201 PPGMSHWPTPAE pPTPae;
202 PX86PD pPD;
203 PX86PDPAE pPDPae;
204 PX86PDPT pPDPT;
205 PX86PML4 pPML4;
206#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
207 PEPTPDPT pEptPdpt;
208 PEPTPD pEptPd;
209 PEPTPT pEptPt;
210#endif
211 } uShw;
212
213 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s write=%#x\n",
214 pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
215
216 uShw.pv = NULL;
217 switch (pPage->enmKind)
218 {
219 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
220 {
221 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
222 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
223 const unsigned iShw = off / sizeof(X86PTE);
224 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
225 X86PGUINT const uPde = uShw.pPT->a[iShw].u;
226 if (uPde & X86_PTE_P)
227 {
228 X86PTE GstPte;
229 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
230 AssertRC(rc);
231 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uPde & X86_PTE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
232 pgmPoolTracDerefGCPhysHint(pPool, pPage, uPde & X86_PTE_PG_MASK, GstPte.u & X86_PTE_PG_MASK, iShw);
233 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
234 }
235 break;
236 }
237
238 /* page/2 sized */
239 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
240 {
241 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
242 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
243 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
244 {
245 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
246 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
247 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
248 {
249 X86PTE GstPte;
250 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
251 AssertRC(rc);
252
253 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
254 pgmPoolTracDerefGCPhysHint(pPool, pPage,
255 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
256 GstPte.u & X86_PTE_PG_MASK,
257 iShw);
258 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
259 }
260 }
261 break;
262 }
263
264 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
265 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
266 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
267 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
268 {
269 unsigned iGst = off / sizeof(X86PDE);
270 unsigned iShwPdpt = iGst / 256;
271 unsigned iShw = (iGst % 256) * 2;
272 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
273
274 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
275 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
276 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
277 {
278 for (unsigned i = 0; i < 2; i++)
279 {
280 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw + i].u;
281 if (uPde & X86_PDE_P)
282 {
283 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw + i, uPde));
284 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw + i);
285 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw + i].u, 0);
286 }
287
288 /* paranoia / a bit assumptive. */
289 if ( (off & 3)
290 && (off & 3) + cbWrite > 4)
291 {
292 const unsigned iShw2 = iShw + 2 + i;
293 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
294 {
295 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
296 if (uPde2 & X86_PDE_P)
297 {
298 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
299 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
300 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
301 }
302 }
303 }
304 }
305 }
306 break;
307 }
308
309 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
310 {
311 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
312 const unsigned iShw = off / sizeof(X86PTEPAE);
313 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
314 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
315 {
316 X86PTEPAE GstPte;
317 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
318 AssertRC(rc);
319
320 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
321 pgmPoolTracDerefGCPhysHint(pPool, pPage,
322 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
323 GstPte.u & X86_PTE_PAE_PG_MASK,
324 iShw);
325 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
326 }
327
328 /* paranoia / a bit assumptive. */
329 if ( (off & 7)
330 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
331 {
332 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
333 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
334
335 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
336 {
337 X86PTEPAE GstPte;
338 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
339 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
340 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
341 AssertRC(rc);
342 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
343 pgmPoolTracDerefGCPhysHint(pPool, pPage,
344 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
345 GstPte.u & X86_PTE_PAE_PG_MASK,
346 iShw2);
347 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
348 }
349 }
350 break;
351 }
352
353 case PGMPOOLKIND_32BIT_PD:
354 {
355 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
356 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
357
358 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
359 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
360 X86PGUINT const uPde = uShw.pPD->a[iShw].u;
361 if (uPde & X86_PDE_P)
362 {
363 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
364 pgmPoolFree(pVM, uPde & X86_PDE_PG_MASK, pPage->idx, iShw);
365 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
366 }
367
368 /* paranoia / a bit assumptive. */
369 if ( (off & 3)
370 && (off & 3) + cbWrite > sizeof(X86PTE))
371 {
372 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
373 if ( iShw2 != iShw
374 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
375 {
376 X86PGUINT const uPde2 = uShw.pPD->a[iShw2].u;
377 if (uPde2 & X86_PDE_P)
378 {
379 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
380 pgmPoolFree(pVM, uPde2 & X86_PDE_PG_MASK, pPage->idx, iShw2);
381 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
382 }
383 }
384 }
385#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). - not working any longer... */
386 if ( uShw.pPD->a[iShw].n.u1Present
387 && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
388 {
389 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
390 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
391 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
392 }
393#endif
394 break;
395 }
396
397 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
398 {
399 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
400 const unsigned iShw = off / sizeof(X86PDEPAE);
401 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
402
403 /*
404 * Causes trouble when the guest uses a PDE to refer to the whole page table level
405 * structure. (Invalidate here; faults later on when it tries to change the page
406 * table entries -> recheck; probably only applies to the RC case.)
407 */
408 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw].u;
409 if (uPde & X86_PDE_P)
410 {
411 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
412 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
413 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
414 }
415
416 /* paranoia / a bit assumptive. */
417 if ( (off & 7)
418 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
419 {
420 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
421 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
422
423 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
424 if (uPde2 & X86_PDE_P)
425 {
426 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
427 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
428 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
429 }
430 }
431 break;
432 }
433
434 case PGMPOOLKIND_PAE_PDPT:
435 {
436 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
437 /*
438 * Hopefully this doesn't happen very often:
439 * - touching unused parts of the page
440 * - messing with the bits of pd pointers without changing the physical address
441 */
442 /* PDPT roots are not page aligned; 32 byte only! */
443 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
444
445 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
446 const unsigned iShw = offPdpt / sizeof(X86PDPE);
447 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
448 {
449 X86PGPAEUINT const uPdpe = uShw.pPDPT->a[iShw].u;
450 if (uPdpe & X86_PDPE_P)
451 {
452 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
453 pgmPoolFree(pVM, uPdpe & X86_PDPE_PG_MASK, pPage->idx, iShw);
454 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
455 }
456
457 /* paranoia / a bit assumptive. */
458 if ( (offPdpt & 7)
459 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
460 {
461 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
462 if ( iShw2 != iShw
463 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
464 {
465 X86PGPAEUINT const uPdpe2 = uShw.pPDPT->a[iShw2].u;
466 if (uPdpe2 & X86_PDPE_P)
467 {
468 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
469 pgmPoolFree(pVM, uPdpe2 & X86_PDPE_PG_MASK, pPage->idx, iShw2);
470 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
471 }
472 }
473 }
474 }
475 break;
476 }
477
478 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
479 {
480 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
481 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
482 const unsigned iShw = off / sizeof(X86PDEPAE);
483 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw].u;
484 if (uPde & X86_PDE_P)
485 {
486 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
487 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
488 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
489 }
490
491 /* paranoia / a bit assumptive. */
492 if ( (off & 7)
493 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
494 {
495 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
496 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
497 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
498 if (uPde2 & X86_PDE_P)
499 {
500 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
501 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
502 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
503 }
504 }
505 break;
506 }
507
508 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
509 {
510 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
511 /*
512 * Hopefully this doesn't happen very often:
513 * - messing with the bits of pd pointers without changing the physical address
514 */
515 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
516 const unsigned iShw = off / sizeof(X86PDPE);
517 X86PGPAEUINT const uPdpe = uShw.pPDPT->a[iShw].u;
518 if (uPdpe & X86_PDPE_P)
519 {
520 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uPdpe));
521 pgmPoolFree(pVM, uPdpe & X86_PDPE_PG_MASK, pPage->idx, iShw);
522 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
523 }
524 /* paranoia / a bit assumptive. */
525 if ( (off & 7)
526 && (off & 7) + cbWrite > sizeof(X86PDPE))
527 {
528 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
529 X86PGPAEUINT const uPdpe2 = uShw.pPDPT->a[iShw2].u;
530 if (uPdpe2 & X86_PDPE_P)
531 {
532 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPdpe2));
533 pgmPoolFree(pVM, uPdpe2 & X86_PDPE_PG_MASK, pPage->idx, iShw2);
534 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
535 }
536 }
537 break;
538 }
539
540 case PGMPOOLKIND_64BIT_PML4:
541 {
542 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
543 /*
544 * Hopefully this doesn't happen very often:
545 * - messing with the bits of pd pointers without changing the physical address
546 */
547 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
548 const unsigned iShw = off / sizeof(X86PDPE);
549 X86PGPAEUINT const uPml4e = uShw.pPML4->a[iShw].u;
550 if (uPml4e & X86_PML4E_P)
551 {
552 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uPml4e));
553 pgmPoolFree(pVM, uPml4e & X86_PML4E_PG_MASK, pPage->idx, iShw);
554 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
555 }
556 /* paranoia / a bit assumptive. */
557 if ( (off & 7)
558 && (off & 7) + cbWrite > sizeof(X86PDPE))
559 {
560 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
561 X86PGPAEUINT const uPml4e2 = uShw.pPML4->a[iShw2].u;
562 if (uPml4e2 & X86_PML4E_P)
563 {
564 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPml4e2));
565 pgmPoolFree(pVM, uPml4e2 & X86_PML4E_PG_MASK, pPage->idx, iShw2);
566 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
567 }
568 }
569 break;
570 }
571
572#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
573 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
574 {
575 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
576 const unsigned iShw = off / sizeof(EPTPML4E);
577 X86PGPAEUINT const uPml4e = uShw.pPML4->a[iShw].u;
578 if (uPml4e & EPT_PRESENT_MASK)
579 {
580 Log7Func(("PML4 iShw=%#x: %RX64 (%RGp) -> freeing it!\n", iShw, uPml4e, pPage->GCPhys));
581 pgmPoolFree(pVM, uPml4e & X86_PML4E_PG_MASK, pPage->idx, iShw);
582 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
583 }
584
585 /* paranoia / a bit assumptive. */
586 if ( (off & 7)
587 && (off & 7) + cbWrite > sizeof(X86PML4E))
588 {
589 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
590 X86PGPAEUINT const uPml4e2 = uShw.pPML4->a[iShw2].u;
591 if (uPml4e2 & EPT_PRESENT_MASK)
592 {
593 Log7Func(("PML4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPml4e2));
594 pgmPoolFree(pVM, uPml4e2 & X86_PML4E_PG_MASK, pPage->idx, iShw2);
595 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
596 }
597 }
598 break;
599 }
600
601 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
602 {
603 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
604 const unsigned iShw = off / sizeof(EPTPDPTE);
605 X86PGPAEUINT const uPdpte = uShw.pEptPdpt->a[iShw].u;
606 if (uPdpte & EPT_PRESENT_MASK)
607 {
608 Log7Func(("EPT PDPT iShw=%#x: %RX64 (%RGp) -> freeing it!\n", iShw, uPdpte, pPage->GCPhys));
609 pgmPoolFree(pVM, uPdpte & EPT_PDPTE_PG_MASK, pPage->idx, iShw);
610 ASMAtomicWriteU64(&uShw.pEptPdpt->a[iShw].u, 0);
611 }
612
613 /* paranoia / a bit assumptive. */
614 if ( (off & 7)
615 && (off & 7) + cbWrite > sizeof(EPTPDPTE))
616 {
617 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(EPTPDPTE);
618 X86PGPAEUINT const uPdpte2 = uShw.pEptPdpt->a[iShw2].u;
619 if (uPdpte2 & EPT_PRESENT_MASK)
620 {
621 Log7Func(("EPT PDPT iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPdpte2));
622 pgmPoolFree(pVM, uPdpte2 & EPT_PDPTE_PG_MASK, pPage->idx, iShw2);
623 ASMAtomicWriteU64(&uShw.pEptPdpt->a[iShw2].u, 0);
624 }
625 }
626 break;
627 }
628
629 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
630 {
631 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
632 const unsigned iShw = off / sizeof(EPTPDE);
633 X86PGPAEUINT const uPde = uShw.pEptPd->a[iShw].u;
634 if (uPde & EPT_PRESENT_MASK)
635 {
636 Assert(!(uPde & EPT_E_LEAF));
637 Log7Func(("EPT PD iShw=%#x: %RX64 (%RGp) -> freeing it!\n", iShw, uPde, pPage->GCPhys));
638 pgmPoolFree(pVM, uPde & EPT_PDE_PG_MASK, pPage->idx, iShw);
639 ASMAtomicWriteU64(&uShw.pEptPd->a[iShw].u, 0);
640 }
641
642 /* paranoia / a bit assumptive. */
643 if ( (off & 7)
644 && (off & 7) + cbWrite > sizeof(EPTPDE))
645 {
646 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(EPTPDE);
647 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pEptPd->a));
648 X86PGPAEUINT const uPde2 = uShw.pEptPd->a[iShw2].u;
649 if (uPde2 & EPT_PRESENT_MASK)
650 {
651 Assert(!(uPde2 & EPT_E_LEAF));
652 Log7Func(("EPT PD (2): iShw2=%#x: %RX64 (%RGp) -> freeing it!\n", iShw2, uPde2, pPage->GCPhys));
653 pgmPoolFree(pVM, uPde2 & EPT_PDE_PG_MASK, pPage->idx, iShw2);
654 ASMAtomicWriteU64(&uShw.pEptPd->a[iShw2].u, 0);
655 }
656 }
657 break;
658 }
659
660 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
661 {
662 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
663 const unsigned iShw = off / sizeof(EPTPTE);
664 X86PGPAEUINT const uPte = uShw.pEptPt->a[iShw].u;
665 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
666 if (uPte & EPT_PRESENT_MASK)
667 {
668 EPTPTE GstPte;
669 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
670 AssertRC(rc);
671
672 Log7Func(("EPT PT: iShw=%#x %RX64 (%RGp)\n", iShw, uPte, pPage->GCPhys));
673 pgmPoolTracDerefGCPhysHint(pPool, pPage,
674 uShw.pEptPt->a[iShw].u & EPT_PTE_PG_MASK,
675 GstPte.u & EPT_PTE_PG_MASK,
676 iShw);
677 ASMAtomicWriteU64(&uShw.pEptPt->a[iShw].u, 0);
678 }
679
680 /* paranoia / a bit assumptive. */
681 if ( (off & 7)
682 && (off & 7) + cbWrite > sizeof(EPTPTE))
683 {
684 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(EPTPTE);
685 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pEptPt->a));
686 X86PGPAEUINT const uPte2 = uShw.pEptPt->a[iShw2].u;
687 if (uPte2 & EPT_PRESENT_MASK)
688 {
689 EPTPTE GstPte;
690 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
691 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
692 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
693 AssertRC(rc);
694 Log7Func(("EPT PT (2): iShw=%#x %RX64 (%RGp)\n", iShw2, uPte2, pPage->GCPhys));
695 pgmPoolTracDerefGCPhysHint(pPool, pPage,
696 uShw.pEptPt->a[iShw2].u & EPT_PTE_PG_MASK,
697 GstPte.u & EPT_PTE_PG_MASK,
698 iShw2);
699 ASMAtomicWriteU64(&uShw.pEptPt->a[iShw2].u, 0);
700 }
701 }
702 break;
703 }
704#endif /* VBOX_WITH_NESTED_HWVIRT_VMX_EPT */
705
706 default:
707 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
708 }
709 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
710
711 /* next */
712 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
713 return;
714 pPage = &pPool->aPages[pPage->iMonitoredNext];
715 }
716}
717
718#ifndef IN_RING3
719
720/**
721 * Checks if a access could be a fork operation in progress.
722 *
723 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
724 *
725 * @returns true if it's likely that we're forking, otherwise false.
726 * @param pPool The pool.
727 * @param pDis The disassembled instruction.
728 * @param offFault The access offset.
729 */
730DECLINLINE(bool) pgmRZPoolMonitorIsForking(PPGMPOOL pPool, PDISSTATE pDis, unsigned offFault)
731{
732 /*
733 * i386 linux is using btr to clear X86_PTE_RW.
734 * The functions involved are (2.6.16 source inspection):
735 * clear_bit
736 * ptep_set_wrprotect
737 * copy_one_pte
738 * copy_pte_range
739 * copy_pmd_range
740 * copy_pud_range
741 * copy_page_range
742 * dup_mmap
743 * dup_mm
744 * copy_mm
745 * copy_process
746 * do_fork
747 */
748 if ( pDis->pCurInstr->uOpcode == OP_BTR
749 && !(offFault & 4)
750 /** @todo Validate that the bit index is X86_PTE_RW. */
751 )
752 {
753 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,Fork)); RT_NOREF_PV(pPool);
754 return true;
755 }
756 return false;
757}
758
759
760/**
761 * Determine whether the page is likely to have been reused.
762 *
763 * @returns true if we consider the page as being reused for a different purpose.
764 * @returns false if we consider it to still be a paging page.
765 * @param pVM The cross context VM structure.
766 * @param pVCpu The cross context virtual CPU structure.
767 * @param pCtx Pointer to the register context for the CPU.
768 * @param pDis The disassembly info for the faulting instruction.
769 * @param pvFault The fault address.
770 * @param pPage The pool page being accessed.
771 *
772 * @remark The REP prefix check is left to the caller because of STOSD/W.
773 */
774DECLINLINE(bool) pgmRZPoolMonitorIsReused(PVMCC pVM, PVMCPUCC pVCpu, PCPUMCTX pCtx, PDISSTATE pDis, RTGCPTR pvFault,
775 PPGMPOOLPAGE pPage)
776{
777 /* Locked (CR3, PDPTR*4) should not be reusable. Considering them as
778 such may cause loops booting tst-ubuntu-15_10-64-efi, ++. */
779 if (pPage->cLocked)
780 {
781 Log2(("pgmRZPoolMonitorIsReused: %RGv (%p) can't have been resued, because it's locked!\n", pvFault, pPage));
782 return false;
783 }
784
785 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
786 if ( HMHasPendingIrq(pVM)
787 && pCtx->rsp - pvFault < 32)
788 {
789 /* Fault caused by stack writes while trying to inject an interrupt event. */
790 Log(("pgmRZPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pCtx->rsp));
791 return true;
792 }
793
794 LogFlow(("Reused instr %RGv %d at %RGv param1.fUse=%llx param1.reg=%d\n", pCtx->rip, pDis->pCurInstr->uOpcode, pvFault, pDis->Param1.fUse, pDis->Param1.arch.x86.Base.idxGenReg));
795
796 /* Non-supervisor mode write means it's used for something else. */
797 if (CPUMGetGuestCPL(pVCpu) == 3)
798 return true;
799
800 switch (pDis->pCurInstr->uOpcode)
801 {
802 /* call implies the actual push of the return address faulted */
803 case OP_CALL:
804 Log4(("pgmRZPoolMonitorIsReused: CALL\n"));
805 return true;
806 case OP_PUSH:
807 Log4(("pgmRZPoolMonitorIsReused: PUSH\n"));
808 return true;
809 case OP_PUSHF:
810 Log4(("pgmRZPoolMonitorIsReused: PUSHF\n"));
811 return true;
812 case OP_PUSHA:
813 Log4(("pgmRZPoolMonitorIsReused: PUSHA\n"));
814 return true;
815 case OP_FXSAVE:
816 Log4(("pgmRZPoolMonitorIsReused: FXSAVE\n"));
817 return true;
818 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
819 Log4(("pgmRZPoolMonitorIsReused: MOVNTI\n"));
820 return true;
821 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
822 Log4(("pgmRZPoolMonitorIsReused: MOVNTDQ\n"));
823 return true;
824 case OP_MOVSWD:
825 case OP_STOSWD:
826 if ( pDis->arch.x86.fPrefix == (DISPREFIX_REP|DISPREFIX_REX)
827 && pCtx->rcx >= 0x40
828 )
829 {
830 Assert(pDis->uCpuMode == DISCPUMODE_64BIT);
831
832 Log(("pgmRZPoolMonitorIsReused: OP_STOSQ\n"));
833 return true;
834 }
835 break;
836
837 default:
838 /*
839 * Anything having ESP on the left side means stack writes.
840 */
841 if ( ( (pDis->Param1.fUse & DISUSE_REG_GEN32)
842 || (pDis->Param1.fUse & DISUSE_REG_GEN64))
843 && (pDis->Param1.arch.x86.Base.idxGenReg == DISGREG_ESP))
844 {
845 Log4(("pgmRZPoolMonitorIsReused: ESP\n"));
846 return true;
847 }
848 break;
849 }
850
851 /*
852 * Page table updates are very very unlikely to be crossing page boundraries,
853 * and we don't want to deal with that in pgmPoolMonitorChainChanging and such.
854 */
855 uint32_t const cbWrite = DISGetParamSize(pDis, &pDis->Param1);
856 if ( (((uintptr_t)pvFault + cbWrite) >> X86_PAGE_SHIFT) != ((uintptr_t)pvFault >> X86_PAGE_SHIFT) )
857 {
858 Log4(("pgmRZPoolMonitorIsReused: cross page write\n"));
859 return true;
860 }
861
862 /*
863 * Nobody does an unaligned 8 byte write to a page table, right.
864 */
865 if (cbWrite >= 8 && ((uintptr_t)pvFault & 7) != 0)
866 {
867 Log4(("pgmRZPoolMonitorIsReused: Unaligned 8+ byte write\n"));
868 return true;
869 }
870
871 return false;
872}
873
874
875/**
876 * Flushes the page being accessed.
877 *
878 * @returns VBox status code suitable for scheduling.
879 * @param pVM The cross context VM structure.
880 * @param pVCpu The cross context virtual CPU structure.
881 * @param pPool The pool.
882 * @param pPage The pool page (head).
883 * @param pDis The disassembly of the write instruction.
884 * @param pCtx Pointer to the register context for the CPU.
885 * @param GCPhysFault The fault address as guest physical address.
886 * @todo VBOXSTRICTRC
887 */
888static int pgmRZPoolAccessPfHandlerFlush(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISSTATE pDis,
889 PCPUMCTX pCtx, RTGCPHYS GCPhysFault)
890{
891 NOREF(pVM); NOREF(GCPhysFault);
892
893 /*
894 * First, do the flushing.
895 */
896 pgmPoolMonitorChainFlush(pPool, pPage);
897
898 /*
899 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
900 * Must do this in raw mode (!); XP boot will fail otherwise.
901 */
902 int rc = VINF_SUCCESS;
903 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, pCtx->rip);
904 if (rc2 == VINF_SUCCESS)
905 { /* do nothing */ }
906 else if (rc2 == VINF_EM_RESCHEDULE)
907 {
908 rc = VBOXSTRICTRC_VAL(rc2);
909# ifndef IN_RING3
910 VMCPU_FF_SET(pVCpu, VMCPU_FF_TO_R3);
911# endif
912 }
913 else if (rc2 == VERR_EM_INTERPRETER)
914 {
915 rc = VINF_EM_RAW_EMULATE_INSTR;
916 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
917 }
918 else if (RT_FAILURE_NP(rc2))
919 rc = VBOXSTRICTRC_VAL(rc2);
920 else
921 AssertMsgFailed(("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
922
923 LogFlow(("pgmRZPoolAccessPfHandlerFlush: returns %Rrc (flushed)\n", rc));
924 return rc;
925}
926
927
928/**
929 * Handles the STOSD write accesses.
930 *
931 * @returns VBox status code suitable for scheduling.
932 * @param pVM The cross context VM structure.
933 * @param pPool The pool.
934 * @param pPage The pool page (head).
935 * @param pDis The disassembly of the write instruction.
936 * @param pCtx Pointer to the register context for the CPU.
937 * @param GCPhysFault The fault address as guest physical address.
938 * @param pvFault The fault address.
939 */
940DECLINLINE(int) pgmRZPoolAccessPfHandlerSTOSD(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISSTATE pDis,
941 PCPUMCTX pCtx, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
942{
943 unsigned uIncrement = pDis->Param1.arch.x86.cb;
944 NOREF(pVM);
945
946 Assert(pDis->uCpuMode == DISCPUMODE_32BIT || pDis->uCpuMode == DISCPUMODE_64BIT);
947 Assert(pCtx->rcx <= 0x20);
948
949# ifdef VBOX_STRICT
950 if (pDis->arch.x86.uOpMode == DISCPUMODE_32BIT)
951 Assert(uIncrement == 4);
952 else
953 Assert(uIncrement == 8);
954# endif
955
956 Log3(("pgmRZPoolAccessPfHandlerSTOSD\n"));
957
958 /*
959 * Increment the modification counter and insert it into the list
960 * of modified pages the first time.
961 */
962 if (!pPage->cModifications++)
963 pgmPoolMonitorModifiedInsert(pPool, pPage);
964
965 /*
966 * Execute REP STOSD.
967 *
968 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
969 * write situation, meaning that it's safe to write here.
970 */
971 PVMCPUCC pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
972 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
973 while (pCtx->rcx)
974 {
975 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
976 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pCtx->rax, uIncrement);
977 pu32 += uIncrement;
978 GCPhysFault += uIncrement;
979 pCtx->rdi += uIncrement;
980 pCtx->rcx--;
981 }
982 pCtx->rip += pDis->cbInstr;
983
984 LogFlow(("pgmRZPoolAccessPfHandlerSTOSD: returns\n"));
985 return VINF_SUCCESS;
986}
987
988
989/**
990 * Handles the simple write accesses.
991 *
992 * @returns VBox status code suitable for scheduling.
993 * @param pVM The cross context VM structure.
994 * @param pVCpu The cross context virtual CPU structure.
995 * @param pPool The pool.
996 * @param pPage The pool page (head).
997 * @param pDis The disassembly of the write instruction.
998 * @param pCtx Pointer to the register context for the CPU.
999 * @param GCPhysFault The fault address as guest physical address.
1000 * @param pfReused Reused state (in/out)
1001 */
1002DECLINLINE(int) pgmRZPoolAccessPfHandlerSimple(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISSTATE pDis,
1003 PCPUMCTX pCtx, RTGCPHYS GCPhysFault, bool *pfReused)
1004{
1005 Log3(("pgmRZPoolAccessPfHandlerSimple\n"));
1006 NOREF(pVM);
1007 NOREF(pfReused); /* initialized by caller */
1008
1009 /*
1010 * Increment the modification counter and insert it into the list
1011 * of modified pages the first time.
1012 */
1013 if (!pPage->cModifications++)
1014 pgmPoolMonitorModifiedInsert(pPool, pPage);
1015
1016 /*
1017 * Clear all the pages.
1018 */
1019 uint32_t cbWrite = DISGetParamSize(pDis, &pDis->Param1);
1020 if (cbWrite <= 8)
1021 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, cbWrite);
1022 else if (cbWrite <= 16)
1023 {
1024 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, 8);
1025 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + 8, NULL, cbWrite - 8);
1026 }
1027 else
1028 {
1029 Assert(cbWrite <= 32);
1030 for (uint32_t off = 0; off < cbWrite; off += 8)
1031 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + off, NULL, RT_MIN(8, cbWrite - off));
1032 }
1033
1034 /*
1035 * Interpret the instruction.
1036 */
1037 VBOXSTRICTRC rc = EMInterpretInstructionDisasState(pVCpu, pDis, pCtx->rip);
1038 if (RT_SUCCESS(rc))
1039 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
1040 else if (rc == VERR_EM_INTERPRETER)
1041 {
1042 LogFlow(("pgmRZPoolAccessPfHandlerSimple: Interpretation failed for %04x:%RGv - opcode=%d\n",
1043 pCtx->cs.Sel, (RTGCPTR)pCtx->rip, pDis->pCurInstr->uOpcode));
1044 rc = VINF_EM_RAW_EMULATE_INSTR;
1045 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
1046 }
1047
1048# if 0 /* experimental code */
1049 if (rc == VINF_SUCCESS)
1050 {
1051 switch (pPage->enmKind)
1052 {
1053 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1054 {
1055 X86PTEPAE GstPte;
1056 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1057 AssertRC(rc);
1058
1059 /* Check the new value written by the guest. If present and with a bogus physical address, then
1060 * it's fairly safe to assume the guest is reusing the PT.
1061 */
1062 if (GstPte.n.u1Present)
1063 {
1064 RTHCPHYS HCPhys = -1;
1065 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1066 if (rc != VINF_SUCCESS)
1067 {
1068 *pfReused = true;
1069 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1070 }
1071 }
1072 break;
1073 }
1074 }
1075 }
1076# endif
1077
1078 LogFlow(("pgmRZPoolAccessPfHandlerSimple: returns %Rrc\n", VBOXSTRICTRC_VAL(rc)));
1079 return VBOXSTRICTRC_VAL(rc);
1080}
1081
1082
1083/**
1084 * @callback_method_impl{FNPGMRZPHYSPFHANDLER,
1085 * \#PF access handler callback for page table pages.}
1086 *
1087 * @remarks The @a uUser argument is the index of the PGMPOOLPAGE.
1088 */
1089DECLCALLBACK(VBOXSTRICTRC) pgmRZPoolAccessPfHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCUINT uErrorCode, PCPUMCTX pCtx,
1090 RTGCPTR pvFault, RTGCPHYS GCPhysFault, uint64_t uUser)
1091{
1092 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorRZ, a);
1093 PPGMPOOL const pPool = pVM->pgm.s.CTX_SUFF(pPool);
1094 AssertReturn(uUser < pPool->cCurPages, VERR_PGM_POOL_IPE);
1095 PPGMPOOLPAGE const pPage = &pPool->aPages[uUser];
1096 unsigned cMaxModifications;
1097 bool fForcedFlush = false;
1098 RT_NOREF_PV(uErrorCode);
1099
1100# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
1101 AssertMsg(pVCpu->pgm.s.enmGuestSlatMode == PGMSLAT_DIRECT,
1102 ("pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1103# endif
1104 LogFlow(("pgmRZPoolAccessPfHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1105
1106 PGM_LOCK_VOID(pVM);
1107 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1108 {
1109 /* Pool page changed while we were waiting for the lock; ignore. */
1110 Log(("CPU%d: pgmRZPoolAccessPfHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1111 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
1112 PGM_UNLOCK(pVM);
1113 return VINF_SUCCESS;
1114 }
1115# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1116 if (pPage->fDirty)
1117 {
1118# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
1119 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage));
1120# endif
1121 Assert(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH));
1122 PGM_UNLOCK(pVM);
1123 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1124 }
1125# endif
1126
1127# if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1128 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1129 {
1130 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1131 void *pvGst;
1132 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1133 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1134 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1135 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1136 }
1137# endif
1138
1139# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
1140 if (PGMPOOL_PAGE_IS_NESTED(pPage))
1141 {
1142 Assert(!CPUMIsGuestInVmxNonRootMode(CPUMQueryGuestCtxPtr(pVCpu)));
1143 Log7Func(("Flushing pvFault=%RGv GCPhysFault=%RGp\n", pvFault, GCPhysFault));
1144 pgmPoolMonitorChainFlush(pPool, pPage);
1145 PGM_UNLOCK(pVM);
1146 return VINF_SUCCESS;
1147 }
1148# endif
1149
1150 /*
1151 * Disassemble the faulting instruction.
1152 */
1153 PDISSTATE pDis = &pVCpu->pgm.s.Dis;
1154 int rc = EMInterpretDisasCurrent(pVCpu, pDis, NULL);
1155 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1156 {
1157 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1158 PGM_UNLOCK(pVM);
1159 return rc;
1160 }
1161
1162 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1163
1164 /*
1165 * We should ALWAYS have the list head as user parameter. This
1166 * is because we use that page to record the changes.
1167 */
1168 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1169
1170# ifdef IN_RING0
1171 /* Maximum nr of modifications depends on the page type. */
1172 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1173 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1174 cMaxModifications = 4;
1175 else
1176 cMaxModifications = 24;
1177# else
1178 cMaxModifications = 48;
1179# endif
1180
1181 /*
1182 * Incremental page table updates should weigh more than random ones.
1183 * (Only applies when started from offset 0)
1184 */
1185 pVCpu->pgm.s.cPoolAccessHandler++;
1186 if ( pPage->GCPtrLastAccessHandlerRip >= pCtx->rip - 0x40 /* observed loops in Windows 7 x64 */
1187 && pPage->GCPtrLastAccessHandlerRip < pCtx->rip + 0x40
1188 && pvFault == (pPage->GCPtrLastAccessHandlerFault + pDis->Param1.arch.x86.cb)
1189 && pVCpu->pgm.s.cPoolAccessHandler == pPage->cLastAccessHandler + 1)
1190 {
1191 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1192 Assert(pPage->cModifications < 32000);
1193 pPage->cModifications = pPage->cModifications * 2;
1194 pPage->GCPtrLastAccessHandlerFault = pvFault;
1195 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1196 if (pPage->cModifications >= cMaxModifications)
1197 {
1198 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushReinit);
1199 fForcedFlush = true;
1200 }
1201 }
1202
1203 if (pPage->cModifications >= cMaxModifications)
1204 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1205
1206 /*
1207 * Check if it's worth dealing with.
1208 */
1209 bool fReused = false;
1210 bool fNotReusedNotForking = false;
1211 if ( ( pPage->cModifications < cMaxModifications /** @todo \#define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1212 || pgmPoolIsPageLocked(pPage)
1213 )
1214 && !(fReused = pgmRZPoolMonitorIsReused(pVM, pVCpu, pCtx, pDis, pvFault, pPage))
1215 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1216 {
1217 /*
1218 * Simple instructions, no REP prefix.
1219 */
1220 if (!(pDis->arch.x86.fPrefix & (DISPREFIX_REP | DISPREFIX_REPNE)))
1221 {
1222 rc = pgmRZPoolAccessPfHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pCtx, GCPhysFault, &fReused);
1223 if (fReused)
1224 goto flushPage;
1225
1226 /* A mov instruction to change the first page table entry will be remembered so we can detect
1227 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1228 */
1229 if ( rc == VINF_SUCCESS
1230 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1231 && pDis->pCurInstr->uOpcode == OP_MOV
1232 && (pvFault & PAGE_OFFSET_MASK) == 0)
1233 {
1234 pPage->GCPtrLastAccessHandlerFault = pvFault;
1235 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1236 pPage->GCPtrLastAccessHandlerRip = pCtx->rip;
1237 /* Make sure we don't kick out a page too quickly. */
1238 if (pPage->cModifications > 8)
1239 pPage->cModifications = 2;
1240 }
1241 else if (pPage->GCPtrLastAccessHandlerFault == pvFault)
1242 {
1243 /* ignore the 2nd write to this page table entry. */
1244 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1245 }
1246 else
1247 {
1248 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
1249 pPage->GCPtrLastAccessHandlerRip = 0;
1250 }
1251
1252 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
1253 PGM_UNLOCK(pVM);
1254 return rc;
1255 }
1256
1257 /*
1258 * Windows is frequently doing small memset() operations (netio test 4k+).
1259 * We have to deal with these or we'll kill the cache and performance.
1260 */
1261 if ( pDis->pCurInstr->uOpcode == OP_STOSWD
1262 && !pCtx->eflags.Bits.u1DF
1263 && pDis->arch.x86.uOpMode == pDis->uCpuMode
1264 && pDis->arch.x86.uAddrMode == pDis->uCpuMode)
1265 {
1266 bool fValidStosd = false;
1267
1268 if ( pDis->uCpuMode == DISCPUMODE_32BIT
1269 && pDis->arch.x86.fPrefix == DISPREFIX_REP
1270 && pCtx->ecx <= 0x20
1271 && pCtx->ecx * 4 <= GUEST_PAGE_SIZE - ((uintptr_t)pvFault & GUEST_PAGE_OFFSET_MASK)
1272 && !((uintptr_t)pvFault & 3)
1273 && (pCtx->eax == 0 || pCtx->eax == 0x80) /* the two values observed. */
1274 )
1275 {
1276 fValidStosd = true;
1277 pCtx->rcx &= 0xffffffff; /* paranoia */
1278 }
1279 else
1280 if ( pDis->uCpuMode == DISCPUMODE_64BIT
1281 && pDis->arch.x86.fPrefix == (DISPREFIX_REP | DISPREFIX_REX)
1282 && pCtx->rcx <= 0x20
1283 && pCtx->rcx * 8 <= GUEST_PAGE_SIZE - ((uintptr_t)pvFault & GUEST_PAGE_OFFSET_MASK)
1284 && !((uintptr_t)pvFault & 7)
1285 && (pCtx->rax == 0 || pCtx->rax == 0x80) /* the two values observed. */
1286 )
1287 {
1288 fValidStosd = true;
1289 }
1290
1291 if (fValidStosd)
1292 {
1293 rc = pgmRZPoolAccessPfHandlerSTOSD(pVM, pPool, pPage, pDis, pCtx, GCPhysFault, pvFault);
1294 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZRepStosd, a);
1295 PGM_UNLOCK(pVM);
1296 return rc;
1297 }
1298 }
1299
1300 /* REP prefix, don't bother. */
1301 STAM_COUNTER_INC(&pPool->StatMonitorPfRZRepPrefix);
1302 Log4(("pgmRZPoolAccessPfHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1303 pCtx->eax, pCtx->ecx, pCtx->edi, pCtx->esi, (RTGCPTR)pCtx->rip, pDis->pCurInstr->uOpcode, pDis->arch.x86.fPrefix));
1304 fNotReusedNotForking = true;
1305 }
1306
1307# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1308 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1309 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1310 */
1311 if ( pPage->cModifications >= cMaxModifications
1312 && !fForcedFlush
1313 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1314 && ( fNotReusedNotForking
1315 || ( !pgmRZPoolMonitorIsReused(pVM, pVCpu, pCtx, pDis, pvFault, pPage)
1316 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1317 )
1318 )
1319 {
1320 Assert(!pgmPoolIsPageLocked(pPage));
1321 Assert(pPage->fDirty == false);
1322
1323 /* Flush any monitored duplicates as we will disable write protection. */
1324 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1325 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1326 {
1327 PPGMPOOLPAGE pPageHead = pPage;
1328
1329 /* Find the monitor head. */
1330 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1331 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1332
1333 while (pPageHead)
1334 {
1335 unsigned idxNext = pPageHead->iMonitoredNext;
1336
1337 if (pPageHead != pPage)
1338 {
1339 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1340 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1341 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1342 AssertRC(rc2);
1343 }
1344
1345 if (idxNext == NIL_PGMPOOL_IDX)
1346 break;
1347
1348 pPageHead = &pPool->aPages[idxNext];
1349 }
1350 }
1351
1352 /* The flushing above might fail for locked pages, so double check. */
1353 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1354 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1355 {
1356 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1357
1358 /* Temporarily allow write access to the page table again. */
1359 rc = PGMHandlerPhysicalPageTempOff(pVM,
1360 pPage->GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK,
1361 pPage->GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
1362 if (rc == VINF_SUCCESS)
1363 {
1364 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1365 AssertMsg(rc == VINF_SUCCESS
1366 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1367 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1368 || rc == VERR_PAGE_NOT_PRESENT,
1369 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1370# ifdef VBOX_STRICT
1371 pPage->GCPtrDirtyFault = pvFault;
1372# endif
1373
1374 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, a);
1375 PGM_UNLOCK(pVM);
1376 return rc;
1377 }
1378 }
1379 }
1380# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT && IN_RING0 */
1381
1382 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushModOverflow);
1383flushPage:
1384 /*
1385 * Not worth it, so flush it.
1386 *
1387 * If we considered it to be reused, don't go back to ring-3
1388 * to emulate failed instructions since we usually cannot
1389 * interpret then. This may be a bit risky, in which case
1390 * the reuse detection must be fixed.
1391 */
1392 rc = pgmRZPoolAccessPfHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pCtx, GCPhysFault);
1393 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1394 && fReused)
1395 {
1396 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage)); /* temporary, remove later. */
1397 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1398 if (PGMShwGetPage(pVCpu, pCtx->rip, NULL, NULL) == VINF_SUCCESS)
1399 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1400 }
1401 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZFlushPage, a);
1402 PGM_UNLOCK(pVM);
1403 return rc;
1404}
1405
1406#endif /* !IN_RING3 */
1407
1408/**
1409 * @callback_method_impl{FNPGMPHYSHANDLER,
1410 * Access handler for shadowed page table pages.}
1411 *
1412 * @remarks Only uses the VINF_PGM_HANDLER_DO_DEFAULT status.
1413 * @note The @a uUser argument is the index of the PGMPOOLPAGE.
1414 */
1415DECLCALLBACK(VBOXSTRICTRC)
1416pgmPoolAccessHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf,
1417 PGMACCESSTYPE enmAccessType, PGMACCESSORIGIN enmOrigin, uint64_t uUser)
1418{
1419 PPGMPOOL const pPool = pVM->pgm.s.CTX_SUFF(pPool);
1420 STAM_PROFILE_START(&pPool->CTX_SUFF_Z(StatMonitor), a);
1421 AssertReturn(uUser < pPool->cCurPages, VERR_PGM_POOL_IPE);
1422 PPGMPOOLPAGE const pPage = &pPool->aPages[uUser];
1423 LogFlow(("PGM_ALL_CB_DECL: GCPhys=%RGp %p:{.Core=%RHp, .idx=%d, .GCPhys=%RGp, .enmType=%d}\n",
1424 GCPhys, pPage, pPage->Core.Key, pPage->idx, pPage->GCPhys, pPage->enmKind));
1425
1426 NOREF(pvPhys); NOREF(pvBuf); NOREF(enmAccessType);
1427
1428 PGM_LOCK_VOID(pVM);
1429
1430#ifdef VBOX_WITH_STATISTICS
1431 /*
1432 * Collect stats on the access.
1433 */
1434 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Sizes)) == 19);
1435 if (cbBuf <= 16 && cbBuf > 0)
1436 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[cbBuf - 1]);
1437 else if (cbBuf >= 17 && cbBuf < 32)
1438 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[16]);
1439 else if (cbBuf >= 32 && cbBuf < 64)
1440 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[17]);
1441 else if (cbBuf >= 64)
1442 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[18]);
1443
1444 uint8_t cbAlign;
1445 switch (pPage->enmKind)
1446 {
1447 default:
1448 cbAlign = 7;
1449 break;
1450 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1451 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1452 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1453 case PGMPOOLKIND_32BIT_PD:
1454 case PGMPOOLKIND_32BIT_PD_PHYS:
1455 cbAlign = 3;
1456 break;
1457 }
1458 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Misaligned)) == 7);
1459 if ((uint8_t)GCPhys & cbAlign)
1460 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Misaligned)[((uint8_t)GCPhys & cbAlign) - 1]);
1461#endif
1462
1463 /*
1464 * Make sure the pool page wasn't modified by a different CPU.
1465 */
1466 if (PHYS_PAGE_ADDRESS(GCPhys) == PHYS_PAGE_ADDRESS(pPage->GCPhys))
1467 {
1468 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1469
1470 /* The max modification count before flushing depends on the context and page type. */
1471#ifdef IN_RING3
1472 uint16_t const cMaxModifications = 96; /* it's cheaper here, right? */
1473#else
1474 uint16_t cMaxModifications;
1475 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1476 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1477 cMaxModifications = 4;
1478 else
1479 cMaxModifications = 24;
1480#endif
1481
1482 /*
1483 * We don't have to be very sophisticated about this since there are relativly few calls here.
1484 * However, we must try our best to detect any non-cpu accesses (disk / networking).
1485 */
1486 if ( ( pPage->cModifications < cMaxModifications
1487 || pgmPoolIsPageLocked(pPage) )
1488 && enmOrigin != PGMACCESSORIGIN_DEVICE
1489 && cbBuf <= 16)
1490 {
1491 /* Clear the shadow entry. */
1492 if (!pPage->cModifications++)
1493 pgmPoolMonitorModifiedInsert(pPool, pPage);
1494
1495 if (cbBuf <= 8)
1496 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, (uint32_t)cbBuf);
1497 else
1498 {
1499 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, 8);
1500 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys + 8, (uint8_t *)pvBuf + 8, (uint32_t)cbBuf - 8);
1501 }
1502 }
1503 else
1504 pgmPoolMonitorChainFlush(pPool, pPage);
1505
1506 STAM_PROFILE_STOP_EX(&pPool->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1507 }
1508 else
1509 Log(("CPU%d: PGM_ALL_CB_DECL pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhys), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1510 PGM_UNLOCK(pVM);
1511 return VINF_PGM_HANDLER_DO_DEFAULT;
1512}
1513
1514
1515#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1516
1517# if defined(VBOX_STRICT) && !defined(IN_RING3)
1518
1519/**
1520 * Check references to guest physical memory in a PAE / PAE page table.
1521 *
1522 * @param pPool The pool.
1523 * @param pPage The page.
1524 * @param pShwPT The shadow page table (mapping of the page).
1525 * @param pGstPT The guest page table.
1526 */
1527static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1528{
1529 unsigned cErrors = 0;
1530 int LastRc = -1; /* initialized to shut up gcc */
1531 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1532 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1533 PVMCC pVM = pPool->CTX_SUFF(pVM);
1534
1535# ifdef VBOX_STRICT
1536 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1537 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1538# endif
1539 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1540 {
1541 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1542 {
1543 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1544 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1545 if ( rc != VINF_SUCCESS
1546 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1547 {
1548 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1549 LastPTE = i;
1550 LastRc = rc;
1551 LastHCPhys = HCPhys;
1552 cErrors++;
1553
1554 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1555 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1556 AssertRC(rc);
1557
1558 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1559 {
1560 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1561
1562 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1563 {
1564 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1565
1566 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1567 {
1568 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1569 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1570 {
1571 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1572 }
1573 }
1574
1575 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1576 }
1577 }
1578 }
1579 }
1580 }
1581 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1582}
1583
1584
1585/**
1586 * Check references to guest physical memory in a PAE / 32-bit page table.
1587 *
1588 * @param pPool The pool.
1589 * @param pPage The page.
1590 * @param pShwPT The shadow page table (mapping of the page).
1591 * @param pGstPT The guest page table.
1592 */
1593static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1594{
1595 unsigned cErrors = 0;
1596 int LastRc = -1; /* initialized to shut up gcc */
1597 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1598 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1599 PVMCC pVM = pPool->CTX_SUFF(pVM);
1600
1601# ifdef VBOX_STRICT
1602 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1603 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1604# endif
1605 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1606 {
1607 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1608 {
1609 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1610 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1611 if ( rc != VINF_SUCCESS
1612 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1613 {
1614 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1615 LastPTE = i;
1616 LastRc = rc;
1617 LastHCPhys = HCPhys;
1618 cErrors++;
1619
1620 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1621 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1622 AssertRC(rc);
1623
1624 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1625 {
1626 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1627
1628 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1629 {
1630 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1631
1632 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1633 {
1634 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1635 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1636 {
1637 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1638 }
1639 }
1640
1641 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1642 }
1643 }
1644 }
1645 }
1646 }
1647 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1648}
1649
1650# endif /* VBOX_STRICT && !IN_RING3 */
1651
1652/**
1653 * Clear references to guest physical memory in a PAE / PAE page table.
1654 *
1655 * @returns nr of changed PTEs
1656 * @param pPool The pool.
1657 * @param pPage The page.
1658 * @param pShwPT The shadow page table (mapping of the page).
1659 * @param pGstPT The guest page table.
1660 * @param pOldGstPT The old cached guest page table.
1661 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1662 * @param pfFlush Flush reused page table (out)
1663 */
1664DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1665 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1666{
1667 unsigned cChanged = 0;
1668
1669# ifdef VBOX_STRICT
1670 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1671 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1672# endif
1673 *pfFlush = false;
1674
1675 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1676 {
1677 /* Check the new value written by the guest. If present and with a bogus physical address, then
1678 * it's fairly safe to assume the guest is reusing the PT.
1679 */
1680 if ( fAllowRemoval
1681 && (pGstPT->a[i].u & X86_PTE_P))
1682 {
1683 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1684 {
1685 *pfFlush = true;
1686 return ++cChanged;
1687 }
1688 }
1689 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1690 {
1691 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1692 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1693 {
1694# ifdef VBOX_STRICT
1695 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1696 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1697 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1698# endif
1699 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1700 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1701 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1702 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1703
1704 if ( uHostAttr == uGuestAttr
1705 && fHostRW <= fGuestRW)
1706 continue;
1707 }
1708 cChanged++;
1709 /* Something was changed, so flush it. */
1710 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1711 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1712 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1713 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1714 }
1715 }
1716 return cChanged;
1717}
1718
1719
1720/**
1721 * Clear references to guest physical memory in a PAE / PAE page table.
1722 *
1723 * @returns nr of changed PTEs
1724 * @param pPool The pool.
1725 * @param pPage The page.
1726 * @param pShwPT The shadow page table (mapping of the page).
1727 * @param pGstPT The guest page table.
1728 * @param pOldGstPT The old cached guest page table.
1729 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1730 * @param pfFlush Flush reused page table (out)
1731 */
1732DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1733 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1734{
1735 unsigned cChanged = 0;
1736
1737# ifdef VBOX_STRICT
1738 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1739 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1740# endif
1741 *pfFlush = false;
1742
1743 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1744 {
1745 /* Check the new value written by the guest. If present and with a bogus physical address, then
1746 * it's fairly safe to assume the guest is reusing the PT. */
1747 if (fAllowRemoval)
1748 {
1749 X86PGUINT const uPte = pGstPT->a[i].u;
1750 if ( (uPte & X86_PTE_P)
1751 && !PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), uPte & X86_PTE_PG_MASK))
1752 {
1753 *pfFlush = true;
1754 return ++cChanged;
1755 }
1756 }
1757 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1758 {
1759 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1760 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1761 {
1762# ifdef VBOX_STRICT
1763 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1764 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1765 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1766# endif
1767 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1768 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1769 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1770 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1771
1772 if ( uHostAttr == uGuestAttr
1773 && fHostRW <= fGuestRW)
1774 continue;
1775 }
1776 cChanged++;
1777 /* Something was changed, so flush it. */
1778 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1779 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1780 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1781 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1782 }
1783 }
1784 return cChanged;
1785}
1786
1787
1788/**
1789 * Flush a dirty page
1790 *
1791 * @param pVM The cross context VM structure.
1792 * @param pPool The pool.
1793 * @param idxSlot Dirty array slot index
1794 * @param fAllowRemoval Allow a reused page table to be removed
1795 */
1796static void pgmPoolFlushDirtyPage(PVMCC pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1797{
1798 AssertCompile(RT_ELEMENTS(pPool->aidxDirtyPages) == RT_ELEMENTS(pPool->aDirtyPages));
1799
1800 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1801 unsigned idxPage = pPool->aidxDirtyPages[idxSlot];
1802 if (idxPage == NIL_PGMPOOL_IDX)
1803 return;
1804
1805 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1806 Assert(pPage->idx == idxPage);
1807 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1808
1809 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1810 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1811
1812 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1813 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
1814 Assert(rc == VINF_SUCCESS);
1815 pPage->fDirty = false;
1816
1817# ifdef VBOX_STRICT
1818 uint64_t fFlags = 0;
1819 RTHCPHYS HCPhys;
1820 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->GCPtrDirtyFault, &fFlags, &HCPhys);
1821 AssertMsg( ( rc == VINF_SUCCESS
1822 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1823 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1824 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1825 || rc == VERR_PAGE_NOT_PRESENT,
1826 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->GCPtrDirtyFault, rc, fFlags));
1827# endif
1828
1829 /* Flush those PTEs that have changed. */
1830 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1831 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1832 void *pvGst;
1833 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1834 bool fFlush;
1835 unsigned cChanges;
1836
1837 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1838 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1839 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1840 else
1841 {
1842 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage)); /* temporary, remove later. */
1843 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1844 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1845 }
1846
1847 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1848 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1849 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1850 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1851
1852 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1853 Assert(pPage->cModifications);
1854 if (cChanges < 4)
1855 pPage->cModifications = 1; /* must use > 0 here */
1856 else
1857 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1858
1859 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1860 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1861 pPool->idxFreeDirtyPage = idxSlot;
1862
1863 pPool->cDirtyPages--;
1864 pPool->aidxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1865 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1866 if (fFlush)
1867 {
1868 Assert(fAllowRemoval);
1869 Log(("Flush reused page table!\n"));
1870 pgmPoolFlushPage(pPool, pPage);
1871 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1872 }
1873 else
1874 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1875}
1876
1877
1878# ifndef IN_RING3
1879/**
1880 * Add a new dirty page
1881 *
1882 * @param pVM The cross context VM structure.
1883 * @param pPool The pool.
1884 * @param pPage The page.
1885 */
1886void pgmPoolAddDirtyPage(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1887{
1888 PGM_LOCK_ASSERT_OWNER(pVM);
1889 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1890 Assert(!pPage->fDirty);
1891 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage));
1892
1893 unsigned idxFree = pPool->idxFreeDirtyPage;
1894 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1895 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1896
1897 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1898 {
1899 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1900 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1901 }
1902 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1903 AssertMsg(pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1904
1905 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1906
1907 /*
1908 * Make a copy of the guest page table as we require valid GCPhys addresses
1909 * when removing references to physical pages.
1910 * (The HCPhys linear lookup is *extremely* expensive!)
1911 */
1912 void *pvGst;
1913 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1914 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst,
1915 pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT ? PAGE_SIZE : PAGE_SIZE / 2);
1916# ifdef VBOX_STRICT
1917 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1918 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1919 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1920 else
1921 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1922 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1923# endif
1924 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1925
1926 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1927 pPage->fDirty = true;
1928 pPage->idxDirtyEntry = (uint8_t)idxFree; Assert(pPage->idxDirtyEntry == idxFree);
1929 pPool->aidxDirtyPages[idxFree] = pPage->idx;
1930 pPool->cDirtyPages++;
1931
1932 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1933 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1934 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1935 {
1936 unsigned i;
1937 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1938 {
1939 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1940 if (pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1941 {
1942 pPool->idxFreeDirtyPage = idxFree;
1943 break;
1944 }
1945 }
1946 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1947 }
1948
1949 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1950
1951 /*
1952 * Clear all references to this shadow table. See @bugref{7298}.
1953 */
1954 pgmPoolTrackClearPageUsers(pPool, pPage);
1955}
1956# endif /* !IN_RING3 */
1957
1958
1959/**
1960 * Check if the specified page is dirty (not write monitored)
1961 *
1962 * @return dirty or not
1963 * @param pVM The cross context VM structure.
1964 * @param GCPhys Guest physical address
1965 */
1966bool pgmPoolIsDirtyPageSlow(PVMCC pVM, RTGCPHYS GCPhys)
1967{
1968 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1969 PGM_LOCK_ASSERT_OWNER(pVM);
1970 if (!pPool->cDirtyPages)
1971 return false;
1972
1973 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1974
1975 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1976 {
1977 unsigned idxPage = pPool->aidxDirtyPages[i];
1978 if (idxPage != NIL_PGMPOOL_IDX)
1979 {
1980 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1981 if (pPage->GCPhys == GCPhys)
1982 return true;
1983 }
1984 }
1985 return false;
1986}
1987
1988
1989/**
1990 * Reset all dirty pages by reinstating page monitoring.
1991 *
1992 * @param pVM The cross context VM structure.
1993 */
1994void pgmPoolResetDirtyPages(PVMCC pVM)
1995{
1996 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1997 PGM_LOCK_ASSERT_OWNER(pVM);
1998 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1999
2000 if (!pPool->cDirtyPages)
2001 return;
2002
2003 Log(("pgmPoolResetDirtyPages\n"));
2004 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2005 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
2006
2007 pPool->idxFreeDirtyPage = 0;
2008 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
2009 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
2010 {
2011 unsigned i;
2012 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2013 {
2014 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
2015 {
2016 pPool->idxFreeDirtyPage = i;
2017 break;
2018 }
2019 }
2020 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
2021 }
2022
2023 Assert(pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
2024 return;
2025}
2026
2027
2028/**
2029 * Invalidate the PT entry for the specified page
2030 *
2031 * @param pVM The cross context VM structure.
2032 * @param GCPtrPage Guest page to invalidate
2033 */
2034void pgmPoolResetDirtyPage(PVMCC pVM, RTGCPTR GCPtrPage)
2035{
2036 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2037 PGM_LOCK_ASSERT_OWNER(pVM);
2038 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
2039
2040 if (!pPool->cDirtyPages)
2041 return;
2042
2043 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage)); RT_NOREF_PV(GCPtrPage);
2044 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2045 {
2046 /** @todo What was intended here??? This looks incomplete... */
2047 }
2048}
2049
2050
2051/**
2052 * Reset all dirty pages by reinstating page monitoring.
2053 *
2054 * @param pVM The cross context VM structure.
2055 * @param GCPhysPT Physical address of the page table
2056 */
2057void pgmPoolInvalidateDirtyPage(PVMCC pVM, RTGCPHYS GCPhysPT)
2058{
2059 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2060 PGM_LOCK_ASSERT_OWNER(pVM);
2061 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
2062 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
2063
2064 if (!pPool->cDirtyPages)
2065 return;
2066
2067 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2068
2069 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2070 {
2071 unsigned idxPage = pPool->aidxDirtyPages[i];
2072 if (idxPage != NIL_PGMPOOL_IDX)
2073 {
2074 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
2075 if (pPage->GCPhys == GCPhysPT)
2076 {
2077 idxDirtyPage = i;
2078 break;
2079 }
2080 }
2081 }
2082
2083 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
2084 {
2085 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
2086 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
2087 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
2088 {
2089 unsigned i;
2090 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2091 {
2092 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
2093 {
2094 pPool->idxFreeDirtyPage = i;
2095 break;
2096 }
2097 }
2098 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
2099 }
2100 }
2101}
2102
2103#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
2104
2105/**
2106 * Inserts a page into the GCPhys hash table.
2107 *
2108 * @param pPool The pool.
2109 * @param pPage The page.
2110 */
2111DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2112{
2113 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
2114 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2115 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2116 pPage->iNext = pPool->aiHash[iHash];
2117 pPool->aiHash[iHash] = pPage->idx;
2118}
2119
2120
2121/**
2122 * Removes a page from the GCPhys hash table.
2123 *
2124 * @param pPool The pool.
2125 * @param pPage The page.
2126 */
2127DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2128{
2129 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
2130 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2131 if (pPool->aiHash[iHash] == pPage->idx)
2132 pPool->aiHash[iHash] = pPage->iNext;
2133 else
2134 {
2135 uint16_t iPrev = pPool->aiHash[iHash];
2136 for (;;)
2137 {
2138 const int16_t i = pPool->aPages[iPrev].iNext;
2139 if (i == pPage->idx)
2140 {
2141 pPool->aPages[iPrev].iNext = pPage->iNext;
2142 break;
2143 }
2144 if (i == NIL_PGMPOOL_IDX)
2145 {
2146 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
2147 break;
2148 }
2149 iPrev = i;
2150 }
2151 }
2152 pPage->iNext = NIL_PGMPOOL_IDX;
2153}
2154
2155
2156/**
2157 * Frees up one cache page.
2158 *
2159 * @returns VBox status code.
2160 * @retval VINF_SUCCESS on success.
2161 * @param pPool The pool.
2162 * @param iUser The user index.
2163 */
2164static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
2165{
2166 const PVMCC pVM = pPool->CTX_SUFF(pVM);
2167 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
2168 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
2169
2170 /*
2171 * Select one page from the tail of the age list.
2172 */
2173 PPGMPOOLPAGE pPage;
2174 for (unsigned iLoop = 0; ; iLoop++)
2175 {
2176 uint16_t iToFree = pPool->iAgeTail;
2177 if (iToFree == iUser && iUser != NIL_PGMPOOL_IDX)
2178 iToFree = pPool->aPages[iToFree].iAgePrev;
2179/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2180 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2181 {
2182 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2183 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2184 {
2185 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2186 continue;
2187 iToFree = i;
2188 break;
2189 }
2190 }
2191*/
2192 Assert(iToFree != iUser);
2193 AssertReleaseMsg(iToFree != NIL_PGMPOOL_IDX,
2194 ("iToFree=%#x (iAgeTail=%#x) iUser=%#x iLoop=%u - pPool=%p LB %#zx\n",
2195 iToFree, pPool->iAgeTail, iUser, iLoop, pPool,
2196 RT_UOFFSETOF_DYN(PGMPOOL, aPages[pPool->cMaxPages])
2197 + pPool->cMaxUsers * sizeof(PGMPOOLUSER)
2198 + pPool->cMaxPhysExts * sizeof(PGMPOOLPHYSEXT) ));
2199
2200 pPage = &pPool->aPages[iToFree];
2201
2202 /*
2203 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2204 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2205 */
2206 if ( !pgmPoolIsPageLocked(pPage)
2207 && pPage->idx >= PGMPOOL_IDX_FIRST /* paranoia (#6349) */)
2208 break;
2209 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2210 pgmPoolCacheUsed(pPool, pPage);
2211 AssertLogRelReturn(iLoop < 8192, VERR_PGM_POOL_TOO_MANY_LOOPS);
2212 }
2213
2214 /*
2215 * Found a usable page, flush it and return.
2216 */
2217 int rc = pgmPoolFlushPage(pPool, pPage);
2218 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2219 /** @todo find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2220 if (rc == VINF_SUCCESS)
2221 PGM_INVL_ALL_VCPU_TLBS(pVM);
2222 return rc;
2223}
2224
2225
2226/**
2227 * Checks if a kind mismatch is really a page being reused
2228 * or if it's just normal remappings.
2229 *
2230 * @returns true if reused and the cached page (enmKind1) should be flushed
2231 * @returns false if not reused.
2232 * @param enmKind1 The kind of the cached page.
2233 * @param enmKind2 The kind of the requested page.
2234 */
2235static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2236{
2237 switch (enmKind1)
2238 {
2239 /*
2240 * Never reuse them. There is no remapping in non-paging mode.
2241 */
2242 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2243 case PGMPOOLKIND_32BIT_PD_PHYS:
2244 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2245 case PGMPOOLKIND_PAE_PD_PHYS:
2246 case PGMPOOLKIND_PAE_PDPT_PHYS:
2247 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2248 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2249 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2250 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2251 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2252 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2253 return false;
2254
2255 /*
2256 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2257 */
2258 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2259 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2260 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2261 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2262 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2263 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2264 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2265 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2266 case PGMPOOLKIND_32BIT_PD:
2267 case PGMPOOLKIND_PAE_PDPT:
2268 Assert(!PGMPOOL_PAGE_IS_KIND_NESTED(enmKind2));
2269 switch (enmKind2)
2270 {
2271 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2272 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2273 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2274 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2275 case PGMPOOLKIND_64BIT_PML4:
2276 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2277 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2278 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2279 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2280 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2281 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2282 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2283 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2284 return true;
2285 default:
2286 return false;
2287 }
2288
2289 /*
2290 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2291 */
2292 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2293 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2294 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2295 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2296 case PGMPOOLKIND_64BIT_PML4:
2297 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2298 Assert(!PGMPOOL_PAGE_IS_KIND_NESTED(enmKind2));
2299 switch (enmKind2)
2300 {
2301 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2302 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2303 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2304 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2305 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2306 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2307 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2308 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2309 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2310 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2311 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2312 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2313 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2314 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2315 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2316 return true;
2317 default:
2318 return false;
2319 }
2320
2321#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2322 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
2323 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
2324 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
2325 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
2326 return PGMPOOL_PAGE_IS_KIND_NESTED(enmKind2);
2327
2328 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
2329 return false;
2330#endif
2331
2332 /*
2333 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2334 */
2335 case PGMPOOLKIND_ROOT_NESTED:
2336 return false;
2337
2338 default:
2339 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2340 }
2341}
2342
2343
2344/**
2345 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2346 *
2347 * @returns VBox status code.
2348 * @retval VINF_PGM_CACHED_PAGE on success.
2349 * @retval VERR_FILE_NOT_FOUND if not found.
2350 * @param pPool The pool.
2351 * @param GCPhys The GC physical address of the page we're gonna shadow.
2352 * @param enmKind The kind of mapping.
2353 * @param enmAccess Access type for the mapping (only relevant for big pages)
2354 * @param fA20Enabled Whether the CPU has the A20 gate enabled.
2355 * @param iUser The shadow page pool index of the user table. This is
2356 * NIL_PGMPOOL_IDX for root pages.
2357 * @param iUserTable The index into the user table (shadowed). Ignored if
2358 * root page
2359 * @param ppPage Where to store the pointer to the page.
2360 */
2361static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
2362 uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2363{
2364 /*
2365 * Look up the GCPhys in the hash.
2366 */
2367 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2368 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2369 if (i != NIL_PGMPOOL_IDX)
2370 {
2371 do
2372 {
2373 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2374 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2375 if (pPage->GCPhys == GCPhys)
2376 {
2377 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2378 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess
2379 && pPage->fA20Enabled == fA20Enabled)
2380 {
2381 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2382 * doesn't flush it in case there are no more free use records.
2383 */
2384 pgmPoolCacheUsed(pPool, pPage);
2385
2386 int rc = VINF_SUCCESS;
2387 if (iUser != NIL_PGMPOOL_IDX)
2388 rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2389 if (RT_SUCCESS(rc))
2390 {
2391 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2392 *ppPage = pPage;
2393 if (pPage->cModifications)
2394 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2395 STAM_COUNTER_INC(&pPool->StatCacheHits);
2396 return VINF_PGM_CACHED_PAGE;
2397 }
2398 return rc;
2399 }
2400
2401 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2402 {
2403 /*
2404 * The kind is different. In some cases we should now flush the page
2405 * as it has been reused, but in most cases this is normal remapping
2406 * of PDs as PT or big pages using the GCPhys field in a slightly
2407 * different way than the other kinds.
2408 */
2409 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2410 {
2411 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2412 pgmPoolFlushPage(pPool, pPage);
2413 break;
2414 }
2415 }
2416 }
2417
2418 /* next */
2419 i = pPage->iNext;
2420 } while (i != NIL_PGMPOOL_IDX);
2421 }
2422
2423 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2424 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2425 return VERR_FILE_NOT_FOUND;
2426}
2427
2428
2429/**
2430 * Inserts a page into the cache.
2431 *
2432 * @param pPool The pool.
2433 * @param pPage The cached page.
2434 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2435 */
2436static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2437{
2438 /*
2439 * Insert into the GCPhys hash if the page is fit for that.
2440 */
2441 Assert(!pPage->fCached);
2442 if (fCanBeCached)
2443 {
2444 pPage->fCached = true;
2445 pgmPoolHashInsert(pPool, pPage);
2446 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2447 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2448 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2449 }
2450 else
2451 {
2452 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2453 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2454 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2455 }
2456
2457 /*
2458 * Insert at the head of the age list.
2459 */
2460 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2461 pPage->iAgeNext = pPool->iAgeHead;
2462 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2463 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2464 else
2465 pPool->iAgeTail = pPage->idx;
2466 pPool->iAgeHead = pPage->idx;
2467}
2468
2469
2470/**
2471 * Flushes a cached page.
2472 *
2473 * @param pPool The pool.
2474 * @param pPage The cached page.
2475 */
2476static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2477{
2478 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2479
2480 /*
2481 * Remove the page from the hash.
2482 */
2483 if (pPage->fCached)
2484 {
2485 pPage->fCached = false;
2486 pgmPoolHashRemove(pPool, pPage);
2487 }
2488 else
2489 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2490
2491 /*
2492 * Remove it from the age list.
2493 */
2494 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2495 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2496 else
2497 pPool->iAgeTail = pPage->iAgePrev;
2498 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2499 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2500 else
2501 pPool->iAgeHead = pPage->iAgeNext;
2502 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2503 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2504}
2505
2506
2507/**
2508 * Looks for pages sharing the monitor.
2509 *
2510 * @returns Pointer to the head page.
2511 * @returns NULL if not found.
2512 * @param pPool The Pool
2513 * @param pNewPage The page which is going to be monitored.
2514 */
2515static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2516{
2517 /*
2518 * Look up the GCPhys in the hash.
2519 */
2520 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2521 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2522 if (i == NIL_PGMPOOL_IDX)
2523 return NULL;
2524 do
2525 {
2526 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2527 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2528 && pPage != pNewPage)
2529 {
2530 switch (pPage->enmKind)
2531 {
2532 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2533 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2534 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2535 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2536 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2537 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2538 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2539 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2540 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2541 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2542 case PGMPOOLKIND_64BIT_PML4:
2543 case PGMPOOLKIND_32BIT_PD:
2544 case PGMPOOLKIND_PAE_PDPT:
2545#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2546 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
2547 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
2548 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
2549#endif
2550 {
2551 /* find the head */
2552 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2553 {
2554 Assert(pPage->iMonitoredPrev != pPage->idx);
2555 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2556 }
2557 return pPage;
2558 }
2559
2560 /* ignore, no monitoring. */
2561 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2562 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2563 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2564 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2565 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2566 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2567 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2568 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2569 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2570 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2571 case PGMPOOLKIND_ROOT_NESTED:
2572 case PGMPOOLKIND_PAE_PD_PHYS:
2573 case PGMPOOLKIND_PAE_PDPT_PHYS:
2574 case PGMPOOLKIND_32BIT_PD_PHYS:
2575 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2576#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2577 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
2578 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
2579#endif
2580 break;
2581 default:
2582 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2583 }
2584 }
2585
2586 /* next */
2587 i = pPage->iNext;
2588 } while (i != NIL_PGMPOOL_IDX);
2589 return NULL;
2590}
2591
2592
2593/**
2594 * Enabled write monitoring of a guest page.
2595 *
2596 * @returns VBox status code.
2597 * @retval VINF_SUCCESS on success.
2598 * @param pPool The pool.
2599 * @param pPage The cached page.
2600 */
2601static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2602{
2603 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2604
2605 /*
2606 * Filter out the relevant kinds.
2607 */
2608 switch (pPage->enmKind)
2609 {
2610 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2611 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2612 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2613 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2614 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2615 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2616 case PGMPOOLKIND_64BIT_PML4:
2617 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2618 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2619 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2620 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2621 case PGMPOOLKIND_32BIT_PD:
2622 case PGMPOOLKIND_PAE_PDPT:
2623 break;
2624
2625 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2626 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2627 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2628 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2629 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2630 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2631 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2632 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2633 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2634 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2635 case PGMPOOLKIND_ROOT_NESTED:
2636 /* Nothing to monitor here. */
2637 return VINF_SUCCESS;
2638
2639 case PGMPOOLKIND_32BIT_PD_PHYS:
2640 case PGMPOOLKIND_PAE_PDPT_PHYS:
2641 case PGMPOOLKIND_PAE_PD_PHYS:
2642 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2643 /* Nothing to monitor here. */
2644 return VINF_SUCCESS;
2645
2646#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2647 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
2648 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
2649 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
2650 break;
2651
2652 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
2653 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
2654 /* Nothing to monitor here. */
2655 return VINF_SUCCESS;
2656#endif
2657
2658 default:
2659 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2660 }
2661
2662 /*
2663 * Install handler.
2664 */
2665 int rc;
2666 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2667 if (pPageHead)
2668 {
2669 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2670 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2671
2672#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2673 if (pPageHead->fDirty)
2674 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirtyEntry, false /* do not remove */);
2675#endif
2676
2677 pPage->iMonitoredPrev = pPageHead->idx;
2678 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2679 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2680 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2681 pPageHead->iMonitoredNext = pPage->idx;
2682 rc = VINF_SUCCESS;
2683 if (PGMPOOL_PAGE_IS_NESTED(pPage))
2684 Log7Func(("Adding to monitoring list GCPhysPage=%RGp\n", pPage->GCPhys));
2685 }
2686 else
2687 {
2688 if (PGMPOOL_PAGE_IS_NESTED(pPage))
2689 Log7Func(("Started monitoring GCPhysPage=%RGp HCPhys=%RHp enmKind=%s\n", pPage->GCPhys, pPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
2690
2691 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2692 PVMCC pVM = pPool->CTX_SUFF(pVM);
2693 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2694 rc = PGMHandlerPhysicalRegister(pVM, GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK, pPool->hAccessHandlerType,
2695 pPage - &pPool->aPages[0], NIL_RTR3PTR /*pszDesc*/);
2696 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2697 * the heap size should suffice. */
2698 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2699 PVMCPU pVCpu = VMMGetCpu(pVM);
2700 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2701 }
2702 pPage->fMonitored = true;
2703 return rc;
2704}
2705
2706
2707/**
2708 * Disables write monitoring of a guest page.
2709 *
2710 * @returns VBox status code.
2711 * @retval VINF_SUCCESS on success.
2712 * @param pPool The pool.
2713 * @param pPage The cached page.
2714 */
2715static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2716{
2717 /*
2718 * Filter out the relevant kinds.
2719 */
2720 switch (pPage->enmKind)
2721 {
2722 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2723 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2724 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2725 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2726 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2727 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2728 case PGMPOOLKIND_64BIT_PML4:
2729 case PGMPOOLKIND_32BIT_PD:
2730 case PGMPOOLKIND_PAE_PDPT:
2731 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2732 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2733 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2734 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2735 break;
2736
2737 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2738 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2739 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2740 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2741 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2742 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2743 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2744 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2745 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2746 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2747 case PGMPOOLKIND_ROOT_NESTED:
2748 case PGMPOOLKIND_PAE_PD_PHYS:
2749 case PGMPOOLKIND_PAE_PDPT_PHYS:
2750 case PGMPOOLKIND_32BIT_PD_PHYS:
2751 /* Nothing to monitor here. */
2752 Assert(!pPage->fMonitored);
2753 return VINF_SUCCESS;
2754
2755#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2756 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
2757 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
2758 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
2759 break;
2760
2761 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
2762 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
2763 /* Nothing to monitor here. */
2764 Assert(!pPage->fMonitored);
2765 return VINF_SUCCESS;
2766#endif
2767
2768 default:
2769 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2770 }
2771 Assert(pPage->fMonitored);
2772
2773 /*
2774 * Remove the page from the monitored list or uninstall it if last.
2775 */
2776 const PVMCC pVM = pPool->CTX_SUFF(pVM);
2777 int rc;
2778 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2779 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2780 {
2781 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2782 {
2783 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2784 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2785 rc = PGMHandlerPhysicalChangeUserArg(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK, pPage->iMonitoredNext);
2786
2787 AssertFatalRCSuccess(rc);
2788 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2789 }
2790 else
2791 {
2792 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2793 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2794 {
2795 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2796 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2797 }
2798 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2799 rc = VINF_SUCCESS;
2800 }
2801 }
2802 else
2803 {
2804 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2805 AssertFatalRC(rc);
2806 PVMCPU pVCpu = VMMGetCpu(pVM);
2807 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2808 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2809 }
2810 pPage->fMonitored = false;
2811
2812 /*
2813 * Remove it from the list of modified pages (if in it).
2814 */
2815 pgmPoolMonitorModifiedRemove(pPool, pPage);
2816
2817 if (PGMPOOL_PAGE_IS_NESTED(pPage))
2818 Log7Func(("Stopped monitoring %RGp\n", pPage->GCPhys));
2819
2820 return rc;
2821}
2822
2823
2824/**
2825 * Inserts the page into the list of modified pages.
2826 *
2827 * @param pPool The pool.
2828 * @param pPage The page.
2829 */
2830void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2831{
2832 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2833 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2834 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2835 && pPool->iModifiedHead != pPage->idx,
2836 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2837 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2838 pPool->iModifiedHead, pPool->cModifiedPages));
2839
2840 pPage->iModifiedNext = pPool->iModifiedHead;
2841 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2842 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2843 pPool->iModifiedHead = pPage->idx;
2844 pPool->cModifiedPages++;
2845#ifdef VBOX_WITH_STATISTICS
2846 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2847 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2848#endif
2849}
2850
2851
2852/**
2853 * Removes the page from the list of modified pages and resets the
2854 * modification counter.
2855 *
2856 * @param pPool The pool.
2857 * @param pPage The page which is believed to be in the list of modified pages.
2858 */
2859static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2860{
2861 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2862 if (pPool->iModifiedHead == pPage->idx)
2863 {
2864 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2865 pPool->iModifiedHead = pPage->iModifiedNext;
2866 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2867 {
2868 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2869 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2870 }
2871 pPool->cModifiedPages--;
2872 }
2873 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2874 {
2875 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2876 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2877 {
2878 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2879 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2880 }
2881 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2882 pPool->cModifiedPages--;
2883 }
2884 else
2885 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2886 pPage->cModifications = 0;
2887}
2888
2889
2890/**
2891 * Zaps the list of modified pages, resetting their modification counters in the process.
2892 *
2893 * @param pVM The cross context VM structure.
2894 */
2895static void pgmPoolMonitorModifiedClearAll(PVMCC pVM)
2896{
2897 PGM_LOCK_VOID(pVM);
2898 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2899 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2900
2901 unsigned cPages = 0; NOREF(cPages);
2902
2903#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2904 pgmPoolResetDirtyPages(pVM);
2905#endif
2906
2907 uint16_t idx = pPool->iModifiedHead;
2908 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2909 while (idx != NIL_PGMPOOL_IDX)
2910 {
2911 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2912 idx = pPage->iModifiedNext;
2913 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2914 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2915 pPage->cModifications = 0;
2916 Assert(++cPages);
2917 }
2918 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2919 pPool->cModifiedPages = 0;
2920 PGM_UNLOCK(pVM);
2921}
2922
2923
2924/**
2925 * Handle SyncCR3 pool tasks
2926 *
2927 * @returns VBox status code.
2928 * @retval VINF_SUCCESS if successfully added.
2929 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2930 * @param pVCpu The cross context virtual CPU structure.
2931 * @remark Should only be used when monitoring is available, thus placed in
2932 * the PGMPOOL_WITH_MONITORING \#ifdef.
2933 */
2934int pgmPoolSyncCR3(PVMCPUCC pVCpu)
2935{
2936 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2937 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2938
2939 /*
2940 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2941 * Occasionally we will have to clear all the shadow page tables because we wanted
2942 * to monitor a page which was mapped by too many shadowed page tables. This operation
2943 * sometimes referred to as a 'lightweight flush'.
2944 */
2945# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2946 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2947 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2948# else /* !IN_RING3 */
2949 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2950 {
2951 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2952 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2953
2954 /* Make sure all other VCPUs return to ring 3. */
2955 if (pVM->cCpus > 1)
2956 {
2957 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2958 PGM_INVL_ALL_VCPU_TLBS(pVM);
2959 }
2960 return VINF_PGM_SYNC_CR3;
2961 }
2962# endif /* !IN_RING3 */
2963 else
2964 {
2965 pgmPoolMonitorModifiedClearAll(pVM);
2966
2967 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2968 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2969 {
2970 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2971 return pgmPoolSyncCR3(pVCpu);
2972 }
2973 }
2974 return VINF_SUCCESS;
2975}
2976
2977
2978/**
2979 * Frees up at least one user entry.
2980 *
2981 * @returns VBox status code.
2982 * @retval VINF_SUCCESS if successfully added.
2983 *
2984 * @param pPool The pool.
2985 * @param iUser The user index.
2986 */
2987static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2988{
2989 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2990 /*
2991 * Just free cached pages in a braindead fashion.
2992 */
2993 /** @todo walk the age list backwards and free the first with usage. */
2994 int rc = VINF_SUCCESS;
2995 do
2996 {
2997 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2998 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2999 rc = rc2;
3000 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
3001 return rc;
3002}
3003
3004
3005/**
3006 * Inserts a page into the cache.
3007 *
3008 * This will create user node for the page, insert it into the GCPhys
3009 * hash, and insert it into the age list.
3010 *
3011 * @returns VBox status code.
3012 * @retval VINF_SUCCESS if successfully added.
3013 *
3014 * @param pPool The pool.
3015 * @param pPage The cached page.
3016 * @param GCPhys The GC physical address of the page we're gonna shadow.
3017 * @param iUser The user index.
3018 * @param iUserTable The user table index.
3019 */
3020DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
3021{
3022 int rc = VINF_SUCCESS;
3023 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3024
3025 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable)); RT_NOREF_PV(GCPhys);
3026
3027 if (iUser != NIL_PGMPOOL_IDX)
3028 {
3029#ifdef VBOX_STRICT
3030 /*
3031 * Check that the entry doesn't already exists.
3032 */
3033 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
3034 {
3035 uint16_t i = pPage->iUserHead;
3036 do
3037 {
3038 Assert(i < pPool->cMaxUsers);
3039 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
3040 i = paUsers[i].iNext;
3041 } while (i != NIL_PGMPOOL_USER_INDEX);
3042 }
3043#endif
3044
3045 /*
3046 * Find free a user node.
3047 */
3048 uint16_t i = pPool->iUserFreeHead;
3049 if (i == NIL_PGMPOOL_USER_INDEX)
3050 {
3051 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
3052 if (RT_FAILURE(rc))
3053 return rc;
3054 i = pPool->iUserFreeHead;
3055 }
3056
3057 /*
3058 * Unlink the user node from the free list,
3059 * initialize and insert it into the user list.
3060 */
3061 pPool->iUserFreeHead = paUsers[i].iNext;
3062 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
3063 paUsers[i].iUser = iUser;
3064 paUsers[i].iUserTable = iUserTable;
3065 pPage->iUserHead = i;
3066 }
3067 else
3068 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3069
3070
3071 /*
3072 * Insert into cache and enable monitoring of the guest page if enabled.
3073 *
3074 * Until we implement caching of all levels, including the CR3 one, we'll
3075 * have to make sure we don't try monitor & cache any recursive reuse of
3076 * a monitored CR3 page. Because all windows versions are doing this we'll
3077 * have to be able to do combined access monitoring, CR3 + PT and
3078 * PD + PT (guest PAE).
3079 *
3080 * Update:
3081 * We're now cooperating with the CR3 monitor if an uncachable page is found.
3082 */
3083 const bool fCanBeMonitored = true;
3084 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
3085 if (fCanBeMonitored)
3086 {
3087 rc = pgmPoolMonitorInsert(pPool, pPage);
3088 AssertRC(rc);
3089 }
3090 return rc;
3091}
3092
3093
3094/**
3095 * Adds a user reference to a page.
3096 *
3097 * This will move the page to the head of the
3098 *
3099 * @returns VBox status code.
3100 * @retval VINF_SUCCESS if successfully added.
3101 *
3102 * @param pPool The pool.
3103 * @param pPage The cached page.
3104 * @param iUser The user index.
3105 * @param iUserTable The user table.
3106 */
3107static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3108{
3109 Log3(("pgmPoolTrackAddUser: GCPhys=%RGp iUser=%x iUserTable=%x\n", pPage->GCPhys, iUser, iUserTable));
3110 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3111 Assert(iUser != NIL_PGMPOOL_IDX);
3112
3113# ifdef VBOX_STRICT
3114 /*
3115 * Check that the entry doesn't already exists. We only allow multiple
3116 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
3117 */
3118 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
3119 {
3120 uint16_t i = pPage->iUserHead;
3121 do
3122 {
3123 Assert(i < pPool->cMaxUsers);
3124 /** @todo this assertion looks odd... Shouldn't it be && here? */
3125 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
3126 i = paUsers[i].iNext;
3127 } while (i != NIL_PGMPOOL_USER_INDEX);
3128 }
3129# endif
3130
3131 /*
3132 * Allocate a user node.
3133 */
3134 uint16_t i = pPool->iUserFreeHead;
3135 if (i == NIL_PGMPOOL_USER_INDEX)
3136 {
3137 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
3138 if (RT_FAILURE(rc))
3139 return rc;
3140 i = pPool->iUserFreeHead;
3141 }
3142 pPool->iUserFreeHead = paUsers[i].iNext;
3143
3144 /*
3145 * Initialize the user node and insert it.
3146 */
3147 paUsers[i].iNext = pPage->iUserHead;
3148 paUsers[i].iUser = iUser;
3149 paUsers[i].iUserTable = iUserTable;
3150 pPage->iUserHead = i;
3151
3152# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3153 if (pPage->fDirty)
3154 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirtyEntry, false /* do not remove */);
3155# endif
3156
3157 /*
3158 * Tell the cache to update its replacement stats for this page.
3159 */
3160 pgmPoolCacheUsed(pPool, pPage);
3161 return VINF_SUCCESS;
3162}
3163
3164
3165/**
3166 * Frees a user record associated with a page.
3167 *
3168 * This does not clear the entry in the user table, it simply replaces the
3169 * user record to the chain of free records.
3170 *
3171 * @param pPool The pool.
3172 * @param pPage The shadow page.
3173 * @param iUser The shadow page pool index of the user table.
3174 * @param iUserTable The index into the user table (shadowed).
3175 *
3176 * @remarks Don't call this for root pages.
3177 */
3178static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3179{
3180 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
3181 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3182 Assert(iUser != NIL_PGMPOOL_IDX);
3183
3184 /*
3185 * Unlink and free the specified user entry.
3186 */
3187
3188 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
3189 uint16_t i = pPage->iUserHead;
3190 if ( i != NIL_PGMPOOL_USER_INDEX
3191 && paUsers[i].iUser == iUser
3192 && paUsers[i].iUserTable == iUserTable)
3193 {
3194 pPage->iUserHead = paUsers[i].iNext;
3195
3196 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3197 paUsers[i].iNext = pPool->iUserFreeHead;
3198 pPool->iUserFreeHead = i;
3199 return;
3200 }
3201
3202 /* General: Linear search. */
3203 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
3204 while (i != NIL_PGMPOOL_USER_INDEX)
3205 {
3206 if ( paUsers[i].iUser == iUser
3207 && paUsers[i].iUserTable == iUserTable)
3208 {
3209 if (iPrev != NIL_PGMPOOL_USER_INDEX)
3210 paUsers[iPrev].iNext = paUsers[i].iNext;
3211 else
3212 pPage->iUserHead = paUsers[i].iNext;
3213
3214 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3215 paUsers[i].iNext = pPool->iUserFreeHead;
3216 pPool->iUserFreeHead = i;
3217 return;
3218 }
3219 iPrev = i;
3220 i = paUsers[i].iNext;
3221 }
3222
3223 /* Fatal: didn't find it */
3224 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
3225 iUser, iUserTable, pPage->GCPhys));
3226}
3227
3228
3229#if 0 /* unused */
3230/**
3231 * Gets the entry size of a shadow table.
3232 *
3233 * @param enmKind The kind of page.
3234 *
3235 * @returns The size of the entry in bytes. That is, 4 or 8.
3236 * @returns If the kind is not for a table, an assertion is raised and 0 is
3237 * returned.
3238 */
3239DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
3240{
3241 switch (enmKind)
3242 {
3243 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3244 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3245 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3246 case PGMPOOLKIND_32BIT_PD:
3247 case PGMPOOLKIND_32BIT_PD_PHYS:
3248 return 4;
3249
3250 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3251 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3252 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3253 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3254 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3255 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3256 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3257 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3258 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3259 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3260 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3261 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3262 case PGMPOOLKIND_64BIT_PML4:
3263 case PGMPOOLKIND_PAE_PDPT:
3264 case PGMPOOLKIND_ROOT_NESTED:
3265 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3266 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3267 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3268 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3269 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3270 case PGMPOOLKIND_PAE_PD_PHYS:
3271 case PGMPOOLKIND_PAE_PDPT_PHYS:
3272 return 8;
3273
3274 default:
3275 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3276 }
3277}
3278#endif /* unused */
3279
3280#if 0 /* unused */
3281/**
3282 * Gets the entry size of a guest table.
3283 *
3284 * @param enmKind The kind of page.
3285 *
3286 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3287 * @returns If the kind is not for a table, an assertion is raised and 0 is
3288 * returned.
3289 */
3290DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3291{
3292 switch (enmKind)
3293 {
3294 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3295 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3296 case PGMPOOLKIND_32BIT_PD:
3297 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3298 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3299 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3300 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3301 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3302 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3303 return 4;
3304
3305 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3306 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3307 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3308 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3309 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3310 case PGMPOOLKIND_64BIT_PML4:
3311 case PGMPOOLKIND_PAE_PDPT:
3312 return 8;
3313
3314 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3315 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3316 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3317 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3318 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3319 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3320 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3321 case PGMPOOLKIND_ROOT_NESTED:
3322 case PGMPOOLKIND_PAE_PD_PHYS:
3323 case PGMPOOLKIND_PAE_PDPT_PHYS:
3324 case PGMPOOLKIND_32BIT_PD_PHYS:
3325 /** @todo can we return 0? (nobody is calling this...) */
3326 AssertFailed();
3327 return 0;
3328
3329 default:
3330 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3331 }
3332}
3333#endif /* unused */
3334
3335
3336/**
3337 * Checks one shadow page table entry for a mapping of a physical page.
3338 *
3339 * @returns true / false indicating removal of all relevant PTEs
3340 *
3341 * @param pVM The cross context VM structure.
3342 * @param pPhysPage The guest page in question.
3343 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3344 * @param iShw The shadow page table.
3345 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3346 */
3347static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3348{
3349 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3350 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3351 bool fRet = false;
3352
3353 /*
3354 * Assert sanity.
3355 */
3356 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3357 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3358 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3359
3360 /*
3361 * Then, clear the actual mappings to the page in the shadow PT.
3362 */
3363 switch (pPage->enmKind)
3364 {
3365 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3366 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3367 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3368 {
3369 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3370 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3371 uint32_t u32AndMask = 0;
3372 uint32_t u32OrMask = 0;
3373
3374 if (!fFlushPTEs)
3375 {
3376 /* Note! Disregarding the PGMPHYSHANDLER_F_NOT_IN_HM bit here. Should be harmless. */
3377 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3378 {
3379 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3380 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3381 u32OrMask = X86_PTE_RW;
3382 u32AndMask = UINT32_MAX;
3383 fRet = true;
3384 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3385 break;
3386
3387 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3388 u32OrMask = 0;
3389 u32AndMask = ~X86_PTE_RW;
3390 fRet = true;
3391 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3392 break;
3393 default:
3394 /* We will end up here when called with an "ALL" access handler. */
3395 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3396 break;
3397 }
3398 }
3399 else
3400 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3401
3402 /* Update the counter if we're removing references. */
3403 if (!u32AndMask)
3404 {
3405 Assert(pPage->cPresent);
3406 Assert(pPool->cPresent);
3407 pPage->cPresent--;
3408 pPool->cPresent--;
3409 }
3410
3411 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3412 {
3413 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3414 X86PTE Pte;
3415 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3416 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3417 Pte.u &= ~(X86PGUINT)X86_PTE_RW; /* need to disallow writes when dirty bit tracking is still active. */
3418
3419 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3420 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3421 return fRet;
3422 }
3423#ifdef LOG_ENABLED
3424 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3425 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3426 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3427 {
3428 Log(("i=%d cFound=%d\n", i, ++cFound));
3429 }
3430#endif
3431 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3432 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3433 break;
3434 }
3435
3436 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3437 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3438 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3439 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3440 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3441 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3442#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
3443 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
3444# ifdef PGM_WITH_LARGE_PAGES
3445 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
3446# endif
3447#endif
3448 {
3449 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3450 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3451 uint64_t u64OrMask = 0;
3452 uint64_t u64AndMask = 0;
3453
3454 if (!fFlushPTEs)
3455 {
3456 /* Note! Disregarding the PGMPHYSHANDLER_F_NOT_IN_HM bit here. Should be harmless. */
3457 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3458 {
3459 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3460 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3461 u64OrMask = X86_PTE_RW;
3462 u64AndMask = UINT64_MAX;
3463 fRet = true;
3464 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3465 break;
3466
3467 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3468 u64OrMask = 0;
3469 u64AndMask = ~(uint64_t)X86_PTE_RW;
3470 fRet = true;
3471 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3472 break;
3473
3474 default:
3475 /* We will end up here when called with an "ALL" access handler. */
3476 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3477 break;
3478 }
3479 }
3480 else
3481 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3482
3483 /* Update the counter if we're removing references. */
3484 if (!u64AndMask)
3485 {
3486 Assert(pPage->cPresent);
3487 Assert(pPool->cPresent);
3488 pPage->cPresent--;
3489 pPool->cPresent--;
3490 }
3491
3492 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3493 {
3494 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3495 X86PTEPAE Pte;
3496 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3497 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3498 Pte.u &= ~(X86PGPAEUINT)X86_PTE_RW; /* need to disallow writes when dirty bit tracking is still active. */
3499
3500 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3501 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3502 return fRet;
3503 }
3504#ifdef LOG_ENABLED
3505 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3506 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3507 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3508 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3509 Log(("i=%d cFound=%d\n", i, ++cFound));
3510#endif
3511 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3512 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3513 break;
3514 }
3515
3516#ifdef PGM_WITH_LARGE_PAGES
3517 /* Large page case only. */
3518 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3519 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
3520 {
3521 Assert(pVM->pgm.s.fNestedPaging);
3522
3523 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3524 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3525
3526 Assert( pPage->enmKind != PGMPOOLKIND_EPT_PD_FOR_EPT_PD
3527 || (pPD->a[iPte].u & EPT_E_LEAF));
3528
3529 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3530 {
3531 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3532 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3533 pPD->a[iPte].u = 0;
3534 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3535
3536 /* Update the counter as we're removing references. */
3537 Assert(pPage->cPresent);
3538 Assert(pPool->cPresent);
3539 pPage->cPresent--;
3540 pPool->cPresent--;
3541
3542 return fRet;
3543 }
3544# ifdef LOG_ENABLED
3545 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3546 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3547 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3548 Log(("i=%d cFound=%d\n", i, ++cFound));
3549# endif
3550 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3551 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3552 break;
3553 }
3554
3555 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3556 case PGMPOOLKIND_PAE_PD_PHYS:
3557 {
3558 Assert(pVM->pgm.s.fNestedPaging);
3559
3560 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3561 PX86PDPAE pPD = (PX86PDPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3562
3563 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3564 {
3565 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3566 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3567 pPD->a[iPte].u = 0;
3568 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3569
3570 /* Update the counter as we're removing references. */
3571 Assert(pPage->cPresent);
3572 Assert(pPool->cPresent);
3573 pPage->cPresent--;
3574 pPool->cPresent--;
3575 return fRet;
3576 }
3577# ifdef LOG_ENABLED
3578 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3579 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3580 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3581 Log(("i=%d cFound=%d\n", i, ++cFound));
3582# endif
3583 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3584 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3585 break;
3586 }
3587#endif /* PGM_WITH_LARGE_PAGES */
3588
3589 default:
3590 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3591 }
3592
3593 /* not reached. */
3594#ifndef _MSC_VER
3595 return fRet;
3596#endif
3597}
3598
3599
3600/**
3601 * Scans one shadow page table for mappings of a physical page.
3602 *
3603 * @param pVM The cross context VM structure.
3604 * @param pPhysPage The guest page in question.
3605 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3606 * @param iShw The shadow page table.
3607 */
3608static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3609{
3610 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3611
3612 /* We should only come here with when there's only one reference to this physical page. */
3613 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3614
3615 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3616 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3617 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3618 if (!fKeptPTEs)
3619 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3620 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3621}
3622
3623
3624/**
3625 * Flushes a list of shadow page tables mapping the same physical page.
3626 *
3627 * @param pVM The cross context VM structure.
3628 * @param pPhysPage The guest page in question.
3629 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3630 * @param iPhysExt The physical cross reference extent list to flush.
3631 */
3632static void pgmPoolTrackFlushGCPhysPTs(PVMCC pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3633{
3634 PGM_LOCK_ASSERT_OWNER(pVM);
3635 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3636 bool fKeepList = false;
3637
3638 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3639 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt=%u\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3640
3641 const uint16_t iPhysExtStart = iPhysExt;
3642 PPGMPOOLPHYSEXT pPhysExt;
3643 do
3644 {
3645 Assert(iPhysExt < pPool->cMaxPhysExts);
3646 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3647 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3648 {
3649 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3650 {
3651 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3652 if (!fKeptPTEs)
3653 {
3654 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3655 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3656 }
3657 else
3658 fKeepList = true;
3659 }
3660 }
3661 /* next */
3662 iPhysExt = pPhysExt->iNext;
3663 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3664
3665 if (!fKeepList)
3666 {
3667 /* insert the list into the free list and clear the ram range entry. */
3668 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3669 pPool->iPhysExtFreeHead = iPhysExtStart;
3670 /* Invalidate the tracking data. */
3671 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3672 }
3673
3674 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3675}
3676
3677
3678/**
3679 * Flushes all shadow page table mappings of the given guest page.
3680 *
3681 * This is typically called when the host page backing the guest one has been
3682 * replaced or when the page protection was changed due to a guest access
3683 * caught by the monitoring.
3684 *
3685 * @returns VBox status code.
3686 * @retval VINF_SUCCESS if all references has been successfully cleared.
3687 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3688 * pool cleaning. FF and sync flags are set.
3689 *
3690 * @param pVM The cross context VM structure.
3691 * @param GCPhysPage GC physical address of the page in question
3692 * @param pPhysPage The guest page in question.
3693 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3694 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3695 * flushed, it is NOT touched if this isn't necessary.
3696 * The caller MUST initialized this to @a false.
3697 */
3698int pgmPoolTrackUpdateGCPhys(PVMCC pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3699{
3700 PVMCPUCC pVCpu = VMMGetCpu(pVM);
3701 PGM_LOCK_VOID(pVM);
3702 int rc = VINF_SUCCESS;
3703
3704#ifdef PGM_WITH_LARGE_PAGES
3705 /* Is this page part of a large page? */
3706 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3707 {
3708 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3709 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3710
3711 /* Fetch the large page base. */
3712 PPGMPAGE pLargePage;
3713 if (GCPhysBase != GCPhysPage)
3714 {
3715 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3716 AssertFatal(pLargePage);
3717 }
3718 else
3719 pLargePage = pPhysPage;
3720
3721 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3722
3723 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3724 {
3725 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3726 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3727 pVM->pgm.s.cLargePagesDisabled++;
3728
3729 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3730 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3731
3732 *pfFlushTLBs = true;
3733 PGM_UNLOCK(pVM);
3734 return rc;
3735 }
3736 }
3737#else
3738 NOREF(GCPhysPage);
3739#endif /* PGM_WITH_LARGE_PAGES */
3740
3741 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3742 if (u16)
3743 {
3744 /*
3745 * The zero page is currently screwing up the tracking and we'll
3746 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3747 * is defined, zero pages won't normally be mapped. Some kind of solution
3748 * will be needed for this problem of course, but it will have to wait...
3749 */
3750 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3751 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3752 rc = VINF_PGM_GCPHYS_ALIASED;
3753 else
3754 {
3755 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3756 {
3757 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3758 pgmPoolTrackFlushGCPhysPT(pVM,
3759 pPhysPage,
3760 fFlushPTEs,
3761 PGMPOOL_TD_GET_IDX(u16));
3762 }
3763 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3764 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3765 else
3766 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3767 *pfFlushTLBs = true;
3768 }
3769 }
3770
3771 if (rc == VINF_PGM_GCPHYS_ALIASED)
3772 {
3773 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3774 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3775 rc = VINF_PGM_SYNC_CR3;
3776 }
3777 PGM_UNLOCK(pVM);
3778 return rc;
3779}
3780
3781
3782/**
3783 * Scans all shadow page tables for mappings of a physical page.
3784 *
3785 * This may be slow, but it's most likely more efficient than cleaning
3786 * out the entire page pool / cache.
3787 *
3788 * @returns VBox status code.
3789 * @retval VINF_SUCCESS if all references has been successfully cleared.
3790 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3791 * a page pool cleaning.
3792 *
3793 * @param pVM The cross context VM structure.
3794 * @param pPhysPage The guest page in question.
3795 */
3796int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage)
3797{
3798 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3799 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3800 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3801 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3802
3803 /*
3804 * There is a limit to what makes sense.
3805 */
3806 if ( pPool->cPresent > 1024
3807 && pVM->cCpus == 1)
3808 {
3809 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3810 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3811 return VINF_PGM_GCPHYS_ALIASED;
3812 }
3813
3814 /*
3815 * Iterate all the pages until we've encountered all that in use.
3816 * This is simple but not quite optimal solution.
3817 */
3818 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage);
3819 unsigned cLeft = pPool->cUsedPages;
3820 unsigned iPage = pPool->cCurPages;
3821 while (--iPage >= PGMPOOL_IDX_FIRST)
3822 {
3823 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3824 if ( pPage->GCPhys != NIL_RTGCPHYS
3825 && pPage->cPresent)
3826 {
3827 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage)); /* see if it hits */
3828 switch (pPage->enmKind)
3829 {
3830 /*
3831 * We only care about shadow page tables.
3832 */
3833 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3834 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3835 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3836 {
3837 const uint32_t u32 = (uint32_t)u64;
3838 unsigned cPresent = pPage->cPresent;
3839 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3840 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3841 {
3842 const X86PGUINT uPte = pPT->a[i].u;
3843 if (uPte & X86_PTE_P)
3844 {
3845 if ((uPte & X86_PTE_PG_MASK) == u32)
3846 {
3847 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3848 ASMAtomicWriteU32(&pPT->a[i].u, 0);
3849
3850 /* Update the counter as we're removing references. */
3851 Assert(pPage->cPresent);
3852 Assert(pPool->cPresent);
3853 pPage->cPresent--;
3854 pPool->cPresent--;
3855 }
3856 if (!--cPresent)
3857 break;
3858 }
3859 }
3860 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3861 break;
3862 }
3863
3864 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3865 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3866 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3867 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3868 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3869 {
3870 unsigned cPresent = pPage->cPresent;
3871 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3872 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3873 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3874 {
3875 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & X86_PTE_PAE_PG_MASK) == u64)
3876 {
3877 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3878 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[i], 0); /// @todo why not atomic?
3879
3880 /* Update the counter as we're removing references. */
3881 Assert(pPage->cPresent);
3882 Assert(pPool->cPresent);
3883 pPage->cPresent--;
3884 pPool->cPresent--;
3885 }
3886 if (!--cPresent)
3887 break;
3888 }
3889 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3890 break;
3891 }
3892
3893 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3894 {
3895 unsigned cPresent = pPage->cPresent;
3896 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3897 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3898 {
3899 X86PGPAEUINT const uPte = pPT->a[i].u;
3900 if (uPte & EPT_E_READ)
3901 {
3902 if ((uPte & EPT_PTE_PG_MASK) == u64)
3903 {
3904 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3905 ASMAtomicWriteU64(&pPT->a[i].u, 0);
3906
3907 /* Update the counter as we're removing references. */
3908 Assert(pPage->cPresent);
3909 Assert(pPool->cPresent);
3910 pPage->cPresent--;
3911 pPool->cPresent--;
3912 }
3913 if (!--cPresent)
3914 break;
3915 }
3916 }
3917 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3918 break;
3919 }
3920 }
3921
3922 if (!--cLeft)
3923 break;
3924 }
3925 }
3926
3927 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3928 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3929
3930 /*
3931 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3932 */
3933 if (pPool->cPresent > 1024)
3934 {
3935 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3936 return VINF_PGM_GCPHYS_ALIASED;
3937 }
3938
3939 return VINF_SUCCESS;
3940}
3941
3942
3943/**
3944 * Clears the user entry in a user table.
3945 *
3946 * This is used to remove all references to a page when flushing it.
3947 */
3948static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3949{
3950 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3951 Assert(pUser->iUser < pPool->cCurPages);
3952 uint32_t iUserTable = pUser->iUserTable;
3953
3954 /*
3955 * Map the user page. Ignore references made by fictitious pages.
3956 */
3957 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3958 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3959 union
3960 {
3961 uint64_t *pau64;
3962 uint32_t *pau32;
3963 } u;
3964 if (pUserPage->idx < PGMPOOL_IDX_FIRST)
3965 {
3966 Assert(!pUserPage->pvPageR3);
3967 return;
3968 }
3969 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3970
3971
3972 /* Safety precaution in case we change the paging for other modes too in the future. */
3973 Assert(!pgmPoolIsPageLocked(pPage)); RT_NOREF_PV(pPage);
3974
3975#ifdef VBOX_STRICT
3976 /*
3977 * Some sanity checks.
3978 */
3979 switch (pUserPage->enmKind)
3980 {
3981 case PGMPOOLKIND_32BIT_PD:
3982 case PGMPOOLKIND_32BIT_PD_PHYS:
3983 Assert(iUserTable < X86_PG_ENTRIES);
3984 break;
3985 case PGMPOOLKIND_PAE_PDPT:
3986 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3987 case PGMPOOLKIND_PAE_PDPT_PHYS:
3988 Assert(iUserTable < 4);
3989 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3990 break;
3991 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3992 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3993 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3994 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3995 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3996 case PGMPOOLKIND_PAE_PD_PHYS:
3997 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3998 break;
3999 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4000 Assert(iUserTable < X86_PG_PAE_ENTRIES);
4001 break;
4002 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4003 Assert(iUserTable < X86_PG_PAE_ENTRIES);
4004 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
4005 break;
4006 case PGMPOOLKIND_64BIT_PML4:
4007 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
4008 /* GCPhys >> PAGE_SHIFT is the index here */
4009 break;
4010 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4011 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4012 Assert(iUserTable < X86_PG_PAE_ENTRIES);
4013 break;
4014
4015 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4016 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4017 Assert(iUserTable < X86_PG_PAE_ENTRIES);
4018 break;
4019
4020 case PGMPOOLKIND_ROOT_NESTED:
4021 Assert(iUserTable < X86_PG_PAE_ENTRIES);
4022 break;
4023
4024# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
4025 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
4026 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
4027 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
4028 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
4029 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
4030 Assert(iUserTable < EPT_PG_ENTRIES);
4031 break;
4032# endif
4033
4034 default:
4035 AssertMsgFailed(("enmKind=%d GCPhys=%RGp\n", pUserPage->enmKind, pPage->GCPhys));
4036 break;
4037 }
4038#endif /* VBOX_STRICT */
4039
4040 /*
4041 * Clear the entry in the user page.
4042 */
4043 switch (pUserPage->enmKind)
4044 {
4045 /* 32-bit entries */
4046 case PGMPOOLKIND_32BIT_PD:
4047 case PGMPOOLKIND_32BIT_PD_PHYS:
4048 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
4049 break;
4050
4051 /* 64-bit entries */
4052 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4053 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4054 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4055 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4056 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4057 case PGMPOOLKIND_PAE_PD_PHYS:
4058 case PGMPOOLKIND_PAE_PDPT_PHYS:
4059 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4060 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4061 case PGMPOOLKIND_64BIT_PML4:
4062 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4063 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4064 case PGMPOOLKIND_PAE_PDPT:
4065 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4066 case PGMPOOLKIND_ROOT_NESTED:
4067 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4068 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4069# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
4070 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
4071 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
4072 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
4073 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
4074 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
4075#endif
4076 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
4077 break;
4078
4079 default:
4080 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
4081 }
4082 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
4083}
4084
4085
4086/**
4087 * Clears all users of a page.
4088 */
4089static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4090{
4091 /*
4092 * Free all the user records.
4093 */
4094 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
4095
4096 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
4097 uint16_t i = pPage->iUserHead;
4098 while (i != NIL_PGMPOOL_USER_INDEX)
4099 {
4100 /* Clear enter in user table. */
4101 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
4102
4103 /* Free it. */
4104 const uint16_t iNext = paUsers[i].iNext;
4105 paUsers[i].iUser = NIL_PGMPOOL_IDX;
4106 paUsers[i].iNext = pPool->iUserFreeHead;
4107 pPool->iUserFreeHead = i;
4108
4109 /* Next. */
4110 i = iNext;
4111 }
4112 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
4113}
4114
4115
4116/**
4117 * Allocates a new physical cross reference extent.
4118 *
4119 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
4120 * @param pVM The cross context VM structure.
4121 * @param piPhysExt Where to store the phys ext index.
4122 */
4123PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVMCC pVM, uint16_t *piPhysExt)
4124{
4125 PGM_LOCK_ASSERT_OWNER(pVM);
4126 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4127 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
4128 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4129 {
4130 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
4131 return NULL;
4132 }
4133 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4134 pPool->iPhysExtFreeHead = pPhysExt->iNext;
4135 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4136 *piPhysExt = iPhysExt;
4137 return pPhysExt;
4138}
4139
4140
4141/**
4142 * Frees a physical cross reference extent.
4143 *
4144 * @param pVM The cross context VM structure.
4145 * @param iPhysExt The extent to free.
4146 */
4147void pgmPoolTrackPhysExtFree(PVMCC pVM, uint16_t iPhysExt)
4148{
4149 PGM_LOCK_ASSERT_OWNER(pVM);
4150 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4151 Assert(iPhysExt < pPool->cMaxPhysExts);
4152 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4153 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4154 {
4155 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4156 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4157 }
4158 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4159 pPool->iPhysExtFreeHead = iPhysExt;
4160}
4161
4162
4163/**
4164 * Frees a physical cross reference extent.
4165 *
4166 * @param pVM The cross context VM structure.
4167 * @param iPhysExt The extent to free.
4168 */
4169void pgmPoolTrackPhysExtFreeList(PVMCC pVM, uint16_t iPhysExt)
4170{
4171 PGM_LOCK_ASSERT_OWNER(pVM);
4172 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4173
4174 const uint16_t iPhysExtStart = iPhysExt;
4175 PPGMPOOLPHYSEXT pPhysExt;
4176 do
4177 {
4178 Assert(iPhysExt < pPool->cMaxPhysExts);
4179 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4180 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4181 {
4182 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4183 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4184 }
4185
4186 /* next */
4187 iPhysExt = pPhysExt->iNext;
4188 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4189
4190 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4191 pPool->iPhysExtFreeHead = iPhysExtStart;
4192}
4193
4194
4195/**
4196 * Insert a reference into a list of physical cross reference extents.
4197 *
4198 * @returns The new tracking data for PGMPAGE.
4199 *
4200 * @param pVM The cross context VM structure.
4201 * @param iPhysExt The physical extent index of the list head.
4202 * @param iShwPT The shadow page table index.
4203 * @param iPte Page table entry
4204 *
4205 */
4206static uint16_t pgmPoolTrackPhysExtInsert(PVMCC pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
4207{
4208 PGM_LOCK_ASSERT_OWNER(pVM);
4209 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4210 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4211
4212 /*
4213 * Special common cases.
4214 */
4215 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
4216 {
4217 paPhysExts[iPhysExt].aidx[1] = iShwPT;
4218 paPhysExts[iPhysExt].apte[1] = iPte;
4219 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
4220 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
4221 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4222 }
4223 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
4224 {
4225 paPhysExts[iPhysExt].aidx[2] = iShwPT;
4226 paPhysExts[iPhysExt].apte[2] = iPte;
4227 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
4228 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
4229 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4230 }
4231 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
4232
4233 /*
4234 * General treatment.
4235 */
4236 const uint16_t iPhysExtStart = iPhysExt;
4237 unsigned cMax = 15;
4238 for (;;)
4239 {
4240 Assert(iPhysExt < pPool->cMaxPhysExts);
4241 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4242 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
4243 {
4244 paPhysExts[iPhysExt].aidx[i] = iShwPT;
4245 paPhysExts[iPhysExt].apte[i] = iPte;
4246 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
4247 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
4248 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
4249 }
4250 if (!--cMax)
4251 {
4252 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackOverflows);
4253 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4254 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
4255 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4256 }
4257
4258 /* advance */
4259 iPhysExt = paPhysExts[iPhysExt].iNext;
4260 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4261 break;
4262 }
4263
4264 /*
4265 * Add another extent to the list.
4266 */
4267 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4268 if (!pNew)
4269 {
4270 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackNoExtentsLeft);
4271 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4272 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4273 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4274 }
4275 pNew->iNext = iPhysExtStart;
4276 pNew->aidx[0] = iShwPT;
4277 pNew->apte[0] = iPte;
4278 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4279 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4280}
4281
4282
4283/**
4284 * Add a reference to guest physical page where extents are in use.
4285 *
4286 * @returns The new tracking data for PGMPAGE.
4287 *
4288 * @param pVM The cross context VM structure.
4289 * @param pPhysPage Pointer to the aPages entry in the ram range.
4290 * @param u16 The ram range flags (top 16-bits).
4291 * @param iShwPT The shadow page table index.
4292 * @param iPte Page table entry
4293 */
4294uint16_t pgmPoolTrackPhysExtAddref(PVMCC pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4295{
4296 PGM_LOCK_VOID(pVM);
4297 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4298 {
4299 /*
4300 * Convert to extent list.
4301 */
4302 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4303 uint16_t iPhysExt;
4304 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4305 if (pPhysExt)
4306 {
4307 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4308 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliased);
4309 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4310 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4311 pPhysExt->aidx[1] = iShwPT;
4312 pPhysExt->apte[1] = iPte;
4313 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4314 }
4315 else
4316 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4317 }
4318 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4319 {
4320 /*
4321 * Insert into the extent list.
4322 */
4323 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4324 }
4325 else
4326 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedLots);
4327 PGM_UNLOCK(pVM);
4328 return u16;
4329}
4330
4331
4332/**
4333 * Clear references to guest physical memory.
4334 *
4335 * @param pPool The pool.
4336 * @param pPage The page.
4337 * @param pPhysPage Pointer to the aPages entry in the ram range.
4338 * @param iPte Shadow PTE index
4339 */
4340void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4341{
4342 PVMCC pVM = pPool->CTX_SUFF(pVM);
4343 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4344 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4345
4346 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4347 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4348 {
4349 PGM_LOCK_VOID(pVM);
4350
4351 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4352 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4353 do
4354 {
4355 Assert(iPhysExt < pPool->cMaxPhysExts);
4356
4357 /*
4358 * Look for the shadow page and check if it's all freed.
4359 */
4360 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4361 {
4362 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4363 && paPhysExts[iPhysExt].apte[i] == iPte)
4364 {
4365 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4366 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4367
4368 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4369 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4370 {
4371 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4372 PGM_UNLOCK(pVM);
4373 return;
4374 }
4375
4376 /* we can free the node. */
4377 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4378 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4379 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4380 {
4381 /* lonely node */
4382 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4383 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4384 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4385 }
4386 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4387 {
4388 /* head */
4389 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4390 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4391 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4392 }
4393 else
4394 {
4395 /* in list */
4396 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4397 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4398 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4399 }
4400 iPhysExt = iPhysExtNext;
4401 PGM_UNLOCK(pVM);
4402 return;
4403 }
4404 }
4405
4406 /* next */
4407 iPhysExtPrev = iPhysExt;
4408 iPhysExt = paPhysExts[iPhysExt].iNext;
4409 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4410
4411 PGM_UNLOCK(pVM);
4412 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4413 }
4414 else /* nothing to do */
4415 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4416}
4417
4418/**
4419 * Clear references to guest physical memory.
4420 *
4421 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4422 * physical address is assumed to be correct, so the linear search can be
4423 * skipped and we can assert at an earlier point.
4424 *
4425 * @param pPool The pool.
4426 * @param pPage The page.
4427 * @param HCPhys The host physical address corresponding to the guest page.
4428 * @param GCPhys The guest physical address corresponding to HCPhys.
4429 * @param iPte Shadow PTE index
4430 */
4431static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4432{
4433 /*
4434 * Lookup the page and check if it checks out before derefing it.
4435 */
4436 PVMCC pVM = pPool->CTX_SUFF(pVM);
4437 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4438 if (pPhysPage)
4439 {
4440 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4441#ifdef LOG_ENABLED
4442 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4443 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4444#endif
4445 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4446 {
4447 Assert(pPage->cPresent);
4448 Assert(pPool->cPresent);
4449 pPage->cPresent--;
4450 pPool->cPresent--;
4451 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4452 return;
4453 }
4454
4455 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp iPte=%u fIsNested=%RTbool\n",
4456 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage), iPte, PGMPOOL_PAGE_IS_NESTED(pPage)));
4457 }
4458 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4459}
4460
4461
4462/**
4463 * Clear references to guest physical memory.
4464 *
4465 * @param pPool The pool.
4466 * @param pPage The page.
4467 * @param HCPhys The host physical address corresponding to the guest page.
4468 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4469 * @param iPte Shadow pte index
4470 */
4471void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4472{
4473 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4474
4475 /*
4476 * Try the hint first.
4477 */
4478 RTHCPHYS HCPhysHinted;
4479 PVMCC pVM = pPool->CTX_SUFF(pVM);
4480 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4481 if (pPhysPage)
4482 {
4483 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4484 Assert(HCPhysHinted);
4485 if (HCPhysHinted == HCPhys)
4486 {
4487 Assert(pPage->cPresent);
4488 Assert(pPool->cPresent);
4489 pPage->cPresent--;
4490 pPool->cPresent--;
4491 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4492 return;
4493 }
4494 }
4495 else
4496 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4497
4498 /*
4499 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4500 */
4501 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4502 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX);
4503 while (pRam)
4504 {
4505 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4506 while (iPage-- > 0)
4507 {
4508 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4509 {
4510 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4511 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4512 Assert(pPage->cPresent);
4513 Assert(pPool->cPresent);
4514 pPage->cPresent--;
4515 pPool->cPresent--;
4516 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4517 return;
4518 }
4519 }
4520 pRam = pRam->CTX_SUFF(pNext);
4521 }
4522
4523 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4524}
4525
4526
4527/**
4528 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4529 *
4530 * @param pPool The pool.
4531 * @param pPage The page.
4532 * @param pShwPT The shadow page table (mapping of the page).
4533 * @param pGstPT The guest page table.
4534 */
4535DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4536{
4537 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4538 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4539 {
4540 const X86PGUINT uPte = pShwPT->a[i].u;
4541 Assert(!(uPte & RT_BIT_32(10)));
4542 if (uPte & X86_PTE_P)
4543 {
4544 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4545 i, uPte & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4546 pgmPoolTracDerefGCPhysHint(pPool, pPage, uPte & X86_PTE_PG_MASK, pGstPT->a[i].u & fPgMask, i);
4547 if (!pPage->cPresent)
4548 break;
4549 }
4550 }
4551}
4552
4553
4554/**
4555 * Clear references to guest physical memory in a PAE / 32-bit page table.
4556 *
4557 * @param pPool The pool.
4558 * @param pPage The page.
4559 * @param pShwPT The shadow page table (mapping of the page).
4560 * @param pGstPT The guest page table (just a half one).
4561 */
4562DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4563{
4564 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4565 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4566 {
4567 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4568 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4569 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4570 {
4571 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4572 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4573 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4574 if (!pPage->cPresent)
4575 break;
4576 }
4577 }
4578}
4579
4580
4581/**
4582 * Clear references to guest physical memory in a PAE / PAE page table.
4583 *
4584 * @param pPool The pool.
4585 * @param pPage The page.
4586 * @param pShwPT The shadow page table (mapping of the page).
4587 * @param pGstPT The guest page table.
4588 */
4589DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4590{
4591 RTGCPHYS const fPgMask = pPage->fA20Enabled ? X86_PTE_PAE_PG_MASK : X86_PTE_PAE_PG_MASK & ~RT_BIT_64(20);
4592 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4593 {
4594 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4595 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4596 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4597 {
4598 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4599 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4600 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4601 if (!pPage->cPresent)
4602 break;
4603 }
4604 }
4605}
4606
4607
4608/**
4609 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4610 *
4611 * @param pPool The pool.
4612 * @param pPage The page.
4613 * @param pShwPT The shadow page table (mapping of the page).
4614 */
4615DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4616{
4617 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4618 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4619 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4620 {
4621 const X86PGUINT uPte = pShwPT->a[i].u;
4622 Assert(!(uPte & RT_BIT_32(10)));
4623 if (uPte & X86_PTE_P)
4624 {
4625 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4626 i, uPte & X86_PTE_PG_MASK, GCPhys));
4627 pgmPoolTracDerefGCPhys(pPool, pPage, uPte & X86_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4628 if (!pPage->cPresent)
4629 break;
4630 }
4631 }
4632}
4633
4634
4635/**
4636 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4637 *
4638 * @param pPool The pool.
4639 * @param pPage The page.
4640 * @param pShwPT The shadow page table (mapping of the page).
4641 */
4642DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4643{
4644 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4645 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4646 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4647 {
4648 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4649 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4650 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4651 {
4652 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4653 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4654 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys & GCPhysA20Mask, i);
4655 if (!pPage->cPresent)
4656 break;
4657 }
4658 }
4659}
4660
4661
4662/**
4663 * Clear references to shadowed pages in an EPT page table.
4664 *
4665 * @param pPool The pool.
4666 * @param pPage The page.
4667 * @param pShwPT The shadow page directory pointer table (mapping of the
4668 * page).
4669 */
4670DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4671{
4672 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4673 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4674 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4675 {
4676 X86PGPAEUINT const uPte = pShwPT->a[i].u;
4677 Assert((uPte & UINT64_C(0xfff0000000000f80)) == 0);
4678 if (uPte & EPT_E_READ)
4679 {
4680 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4681 i, uPte & EPT_PTE_PG_MASK, pPage->GCPhys));
4682 pgmPoolTracDerefGCPhys(pPool, pPage, uPte & EPT_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4683 if (!pPage->cPresent)
4684 break;
4685 }
4686 }
4687}
4688
4689#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
4690
4691/**
4692 * Clears references to shadowed pages in a SLAT EPT page table.
4693 *
4694 * @param pPool The pool.
4695 * @param pPage The page.
4696 * @param pShwPT The shadow page table (mapping of the page).
4697 * @param pGstPT The guest page table.
4698 */
4699DECLINLINE(void) pgmPoolTrackDerefNestedPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT, PCEPTPT pGstPT)
4700{
4701 Assert(PGMPOOL_PAGE_IS_NESTED(pPage));
4702 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4703 {
4704 X86PGPAEUINT const uShwPte = pShwPT->a[i].u;
4705 Assert((uShwPte & UINT64_C(0xfff0000000000f80)) == 0); /* Access, Dirty, UserX (not supported) and ignored bits 7, 11. */
4706 if (uShwPte & EPT_PRESENT_MASK)
4707 {
4708 Log7Func(("Shw=%RX64 GstPte=%RX64\n", uShwPte, pGstPT->a[i].u));
4709 pgmPoolTracDerefGCPhys(pPool, pPage, uShwPte & EPT_PTE_PG_MASK, pGstPT->a[i].u & EPT_PTE_PG_MASK, i);
4710 if (!pPage->cPresent)
4711 break;
4712 }
4713 }
4714}
4715
4716
4717/**
4718 * Clear references to guest physical memory in a SLAT 2MB EPT page table.
4719 *
4720 * @param pPool The pool.
4721 * @param pPage The page.
4722 * @param pShwPT The shadow page table (mapping of the page).
4723 */
4724DECLINLINE(void) pgmPoolTrackDerefNestedPTEPT2MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4725{
4726 Assert(pPage->fA20Enabled);
4727 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4728 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4729 {
4730 X86PGPAEUINT const uShwPte = pShwPT->a[i].u;
4731 Assert((uShwPte & UINT64_C(0xfff0000000000f80)) == 0); /* Access, Dirty, UserX (not supported) and ignored bits 7, 11. */
4732 if (uShwPte & EPT_PRESENT_MASK)
4733 {
4734 Log7Func(("Shw=%RX64 GstPte=%RX64\n", uShwPte, GCPhys));
4735 pgmPoolTracDerefGCPhys(pPool, pPage, uShwPte & EPT_PTE_PG_MASK, GCPhys, i);
4736 if (!pPage->cPresent)
4737 break;
4738 }
4739 }
4740}
4741
4742
4743/**
4744 * Clear references to shadowed pages in a SLAT EPT page directory.
4745 *
4746 * @param pPool The pool.
4747 * @param pPage The page.
4748 * @param pShwPD The shadow page directory (mapping of the page).
4749 * @param pGstPD The guest page directory.
4750 */
4751DECLINLINE(void) pgmPoolTrackDerefNestedPDEpt(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD, PCEPTPD pGstPD)
4752{
4753 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4754 {
4755 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4756#ifdef PGM_WITH_LARGE_PAGES
4757 AssertMsg((uPde & UINT64_C(0xfff0000000000f00)) == 0, ("uPde=%RX64\n", uPde));
4758#else
4759 AssertMsg((uPde & UINT64_C(0xfff0000000000f80)) == 0, ("uPde=%RX64\n", uPde));
4760#endif
4761 if (uPde & EPT_PRESENT_MASK)
4762 {
4763#ifdef PGM_WITH_LARGE_PAGES
4764 if (uPde & EPT_E_LEAF)
4765 {
4766 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n", i, uPde & EPT_PDE2M_PG_MASK, pPage->GCPhys));
4767 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & EPT_PDE2M_PG_MASK, pGstPD->a[i].u & EPT_PDE2M_PG_MASK, i);
4768 }
4769 else
4770#endif
4771 {
4772 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & EPT_PDE_PG_MASK);
4773 if (pSubPage)
4774 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4775 else
4776 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4777 }
4778 }
4779 }
4780}
4781
4782#endif /* VBOX_WITH_NESTED_HWVIRT_VMX_EPT */
4783
4784
4785/**
4786 * Clear references to shadowed pages in a 32 bits page directory.
4787 *
4788 * @param pPool The pool.
4789 * @param pPage The page.
4790 * @param pShwPD The shadow page directory (mapping of the page).
4791 */
4792DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4793{
4794 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4795 {
4796 X86PGUINT const uPde = pShwPD->a[i].u;
4797 if (uPde & X86_PDE_P)
4798 {
4799 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4800 if (pSubPage)
4801 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4802 else
4803 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4804 }
4805 }
4806}
4807
4808
4809/**
4810 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4811 *
4812 * @param pPool The pool.
4813 * @param pPage The page.
4814 * @param pShwPD The shadow page directory (mapping of the page).
4815 */
4816DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4817{
4818 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4819 {
4820 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4821 if (uPde & X86_PDE_P)
4822 {
4823#ifdef PGM_WITH_LARGE_PAGES
4824 if (uPde & X86_PDE_PS)
4825 {
4826 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4827 i, uPde & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4828 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & X86_PDE2M_PAE_PG_MASK,
4829 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4830 i);
4831 }
4832 else
4833#endif
4834 {
4835 Assert((uPde & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000000))) == 0);
4836 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & X86_PDE_PAE_PG_MASK);
4837 if (pSubPage)
4838 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4839 else
4840 AssertFatalMsgFailed(("%RX64\n", uPde & X86_PDE_PAE_PG_MASK));
4841 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4842 }
4843 }
4844 }
4845}
4846
4847
4848/**
4849 * Clear references to shadowed pages in a PAE page directory pointer table.
4850 *
4851 * @param pPool The pool.
4852 * @param pPage The page.
4853 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4854 */
4855DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4856{
4857 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4858 {
4859 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4860 Assert((uPdpe & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4861 if (uPdpe & X86_PDPE_P)
4862 {
4863 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & X86_PDPE_PG_MASK);
4864 if (pSubPage)
4865 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4866 else
4867 AssertFatalMsgFailed(("%RX64\n", uPdpe & X86_PDPE_PG_MASK));
4868 }
4869 }
4870}
4871
4872
4873/**
4874 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4875 *
4876 * @param pPool The pool.
4877 * @param pPage The page.
4878 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4879 */
4880DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4881{
4882 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4883 {
4884 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4885 Assert((uPdpe & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4886 if (uPdpe & X86_PDPE_P)
4887 {
4888 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & X86_PDPE_PG_MASK);
4889 if (pSubPage)
4890 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4891 else
4892 AssertFatalMsgFailed(("%RX64\n", uPdpe & X86_PDPE_PG_MASK));
4893 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4894 }
4895 }
4896}
4897
4898
4899/**
4900 * Clear references to shadowed pages in a 64-bit level 4 page table.
4901 *
4902 * @param pPool The pool.
4903 * @param pPage The page.
4904 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4905 */
4906DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4907{
4908 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4909 {
4910 X86PGPAEUINT const uPml4e = pShwPML4->a[i].u;
4911 Assert((uPml4e & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4912 if (uPml4e & X86_PML4E_P)
4913 {
4914 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPml4e & X86_PDPE_PG_MASK);
4915 if (pSubPage)
4916 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4917 else
4918 AssertFatalMsgFailed(("%RX64\n", uPml4e & X86_PML4E_PG_MASK));
4919 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4920 }
4921 }
4922}
4923
4924
4925/**
4926 * Clear references to shadowed pages in an EPT page directory.
4927 *
4928 * @param pPool The pool.
4929 * @param pPage The page.
4930 * @param pShwPD The shadow page directory (mapping of the page).
4931 */
4932DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4933{
4934 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4935 {
4936 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4937#ifdef PGM_WITH_LARGE_PAGES
4938 AssertMsg((uPde & UINT64_C(0xfff0000000000f00)) == 0, ("uPde=%RX64\n", uPde));
4939#else
4940 AssertMsg((uPde & UINT64_C(0xfff0000000000f80)) == 0, ("uPde=%RX64\n", uPde));
4941#endif
4942 if (uPde & EPT_E_READ)
4943 {
4944#ifdef PGM_WITH_LARGE_PAGES
4945 if (uPde & EPT_E_LEAF)
4946 {
4947 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4948 i, uPde & EPT_PDE2M_PG_MASK, pPage->GCPhys));
4949 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & EPT_PDE2M_PG_MASK,
4950 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4951 i);
4952 }
4953 else
4954#endif
4955 {
4956 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & EPT_PDE_PG_MASK);
4957 if (pSubPage)
4958 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4959 else
4960 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4961 }
4962 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4963 }
4964 }
4965}
4966
4967
4968/**
4969 * Clear references to shadowed pages in an EPT page directory pointer table.
4970 *
4971 * @param pPool The pool.
4972 * @param pPage The page.
4973 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4974 */
4975DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4976{
4977 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4978 {
4979 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4980 Assert((uPdpe & UINT64_C(0xfff0000000000f80)) == 0);
4981 if (uPdpe & EPT_E_READ)
4982 {
4983 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & EPT_PDPTE_PG_MASK);
4984 if (pSubPage)
4985 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4986 else
4987 AssertFatalMsgFailed(("%RX64\n", uPdpe & EPT_PDPTE_PG_MASK));
4988 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4989 }
4990 }
4991}
4992
4993
4994/**
4995 * Clears all references made by this page.
4996 *
4997 * This includes other shadow pages and GC physical addresses.
4998 *
4999 * @param pPool The pool.
5000 * @param pPage The page.
5001 */
5002static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
5003{
5004 /*
5005 * Map the shadow page and take action according to the page kind.
5006 */
5007 PVMCC pVM = pPool->CTX_SUFF(pVM);
5008 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5009 switch (pPage->enmKind)
5010 {
5011 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5012 {
5013 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
5014 void *pvGst;
5015 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
5016 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
5017 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
5018 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
5019 break;
5020 }
5021
5022 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5023 {
5024 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
5025 void *pvGst;
5026 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
5027 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
5028 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
5029 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
5030 break;
5031 }
5032
5033 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5034 {
5035 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
5036 void *pvGst;
5037 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
5038 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
5039 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
5040 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
5041 break;
5042 }
5043
5044 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
5045 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5046 {
5047 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
5048 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
5049 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
5050 break;
5051 }
5052
5053 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
5054 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5055 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5056 {
5057 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
5058 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
5059 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
5060 break;
5061 }
5062
5063 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5064 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5065 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5066 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5067 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5068 case PGMPOOLKIND_PAE_PD_PHYS:
5069 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5070 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5071 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
5072 break;
5073
5074 case PGMPOOLKIND_32BIT_PD_PHYS:
5075 case PGMPOOLKIND_32BIT_PD:
5076 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
5077 break;
5078
5079 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5080 case PGMPOOLKIND_PAE_PDPT:
5081 case PGMPOOLKIND_PAE_PDPT_PHYS:
5082 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
5083 break;
5084
5085 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5086 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5087 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
5088 break;
5089
5090 case PGMPOOLKIND_64BIT_PML4:
5091 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
5092 break;
5093
5094 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5095 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
5096 break;
5097
5098 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5099 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
5100 break;
5101
5102 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5103 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
5104 break;
5105
5106#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
5107 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
5108 {
5109 void *pvGst;
5110 int const rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
5111 pgmPoolTrackDerefNestedPTEPT(pPool, pPage, (PEPTPT)pvShw, (PCEPTPT)pvGst);
5112 break;
5113 }
5114
5115 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
5116 pgmPoolTrackDerefNestedPTEPT2MB(pPool, pPage, (PEPTPT)pvShw);
5117 break;
5118
5119 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
5120 {
5121 void *pvGst;
5122 int const rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
5123 pgmPoolTrackDerefNestedPDEpt(pPool, pPage, (PEPTPD)pvShw, (PCEPTPD)pvGst);
5124 break;
5125 }
5126
5127 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
5128 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
5129 break;
5130#endif
5131
5132 default:
5133 AssertFatalMsgFailed(("enmKind=%d GCPhys=%RGp\n", pPage->enmKind, pPage->GCPhys));
5134 }
5135
5136 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
5137 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5138 ASMMemZeroPage(pvShw);
5139 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5140 pPage->fZeroed = true;
5141 Assert(!pPage->cPresent);
5142 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
5143}
5144
5145
5146/**
5147 * Flushes a pool page.
5148 *
5149 * This moves the page to the free list after removing all user references to it.
5150 *
5151 * @returns VBox status code.
5152 * @retval VINF_SUCCESS on success.
5153 * @param pPool The pool.
5154 * @param pPage The shadow page.
5155 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
5156 */
5157int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
5158{
5159 PVMCC pVM = pPool->CTX_SUFF(pVM);
5160 bool fFlushRequired = false;
5161
5162 int rc = VINF_SUCCESS;
5163 STAM_PROFILE_START(&pPool->StatFlushPage, f);
5164 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
5165 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
5166
5167 if (PGMPOOL_PAGE_IS_NESTED(pPage))
5168 Log7Func(("pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
5169 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
5170
5171 /*
5172 * Reject any attempts at flushing any of the special root pages (shall
5173 * not happen).
5174 */
5175 AssertMsgReturn(pPage->idx >= PGMPOOL_IDX_FIRST,
5176 ("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n",
5177 pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx),
5178 VINF_SUCCESS);
5179
5180 PGM_LOCK_VOID(pVM);
5181
5182 /*
5183 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
5184 */
5185 if (pgmPoolIsPageLocked(pPage))
5186 {
5187#if !defined(VBOX_VMM_TARGET_ARMV8)
5188 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
5189 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
5190 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
5191 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
5192 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
5193 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
5194 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
5195 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
5196 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
5197 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
5198 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
5199#endif
5200 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
5201 PGM_UNLOCK(pVM);
5202 return VINF_SUCCESS;
5203 }
5204
5205 /*
5206 * Mark the page as being in need of an ASMMemZeroPage().
5207 */
5208 pPage->fZeroed = false;
5209
5210#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5211 if (pPage->fDirty)
5212 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirtyEntry, false /* do not remove */);
5213#endif
5214
5215 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
5216 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
5217 fFlushRequired = true;
5218
5219 /*
5220 * Clear the page.
5221 */
5222 pgmPoolTrackClearPageUsers(pPool, pPage);
5223 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
5224 pgmPoolTrackDeref(pPool, pPage);
5225 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
5226
5227 /*
5228 * Flush it from the cache.
5229 */
5230 pgmPoolCacheFlushPage(pPool, pPage);
5231
5232 /*
5233 * Deregistering the monitoring.
5234 */
5235 if (pPage->fMonitored)
5236 rc = pgmPoolMonitorFlush(pPool, pPage);
5237
5238 /*
5239 * Free the page.
5240 */
5241 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
5242 pPage->iNext = pPool->iFreeHead;
5243 pPool->iFreeHead = pPage->idx;
5244 pPage->enmKind = PGMPOOLKIND_FREE;
5245 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5246 pPage->GCPhys = NIL_RTGCPHYS;
5247 pPage->fReusedFlushPending = false;
5248
5249 pPool->cUsedPages--;
5250
5251 /* Flush the TLBs of all VCPUs if required. */
5252 if ( fFlushRequired
5253 && fFlush)
5254 {
5255 PGM_INVL_ALL_VCPU_TLBS(pVM);
5256 }
5257
5258 PGM_UNLOCK(pVM);
5259 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
5260 return rc;
5261}
5262
5263
5264/**
5265 * Frees a usage of a pool page.
5266 *
5267 * The caller is responsible to updating the user table so that it no longer
5268 * references the shadow page.
5269 *
5270 * @param pPool The pool.
5271 * @param pPage The shadow page.
5272 * @param iUser The shadow page pool index of the user table.
5273 * NIL_PGMPOOL_IDX for root pages.
5274 * @param iUserTable The index into the user table (shadowed). Ignored if
5275 * root page.
5276 */
5277void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
5278{
5279 PVMCC pVM = pPool->CTX_SUFF(pVM);
5280
5281 STAM_PROFILE_START(&pPool->StatFree, a);
5282 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
5283 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
5284 AssertReturnVoid(pPage->idx >= PGMPOOL_IDX_FIRST); /* paranoia (#6349) */
5285
5286 PGM_LOCK_VOID(pVM);
5287 if (iUser != NIL_PGMPOOL_IDX)
5288 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
5289 if (!pPage->fCached)
5290 pgmPoolFlushPage(pPool, pPage);
5291 PGM_UNLOCK(pVM);
5292 STAM_PROFILE_STOP(&pPool->StatFree, a);
5293}
5294
5295
5296/**
5297 * Makes one or more free page free.
5298 *
5299 * @returns VBox status code.
5300 * @retval VINF_SUCCESS on success.
5301 *
5302 * @param pPool The pool.
5303 * @param enmKind Page table kind
5304 * @param iUser The user of the page.
5305 */
5306static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
5307{
5308 PVMCC pVM = pPool->CTX_SUFF(pVM);
5309 LogFlow(("pgmPoolMakeMoreFreePages: enmKind=%d iUser=%d\n", enmKind, iUser));
5310 NOREF(enmKind);
5311
5312 /*
5313 * If the pool isn't full grown yet, expand it.
5314 */
5315 if (pPool->cCurPages < pPool->cMaxPages)
5316 {
5317 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
5318#ifdef IN_RING3
5319 int rc = PGMR3PoolGrow(pVM, VMMGetCpu(pVM));
5320#else
5321 int rc = PGMR0PoolGrow(pVM, VMMGetCpuId(pVM));
5322#endif
5323 if (RT_FAILURE(rc))
5324 return rc;
5325 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
5326 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
5327 return VINF_SUCCESS;
5328 }
5329
5330 /*
5331 * Free one cached page.
5332 */
5333 return pgmPoolCacheFreeOne(pPool, iUser);
5334}
5335
5336
5337/**
5338 * Allocates a page from the pool.
5339 *
5340 * This page may actually be a cached page and not in need of any processing
5341 * on the callers part.
5342 *
5343 * @returns VBox status code.
5344 * @retval VINF_SUCCESS if a NEW page was allocated.
5345 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
5346 *
5347 * @param pVM The cross context VM structure.
5348 * @param GCPhys The GC physical address of the page we're gonna shadow.
5349 * For 4MB and 2MB PD entries, it's the first address the
5350 * shadow PT is covering.
5351 * @param enmKind The kind of mapping.
5352 * @param enmAccess Access type for the mapping (only relevant for big pages)
5353 * @param fA20Enabled Whether the A20 gate is enabled or not.
5354 * @param iUser The shadow page pool index of the user table. Root
5355 * pages should pass NIL_PGMPOOL_IDX.
5356 * @param iUserTable The index into the user table (shadowed). Ignored for
5357 * root pages (iUser == NIL_PGMPOOL_IDX).
5358 * @param fLockPage Lock the page
5359 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
5360 */
5361int pgmPoolAlloc(PVMCC pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
5362 uint16_t iUser, uint32_t iUserTable, bool fLockPage, PPPGMPOOLPAGE ppPage)
5363{
5364 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5365 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
5366 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
5367 *ppPage = NULL;
5368 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
5369 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
5370 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
5371
5372#if defined(VBOX_STRICT) && defined(VBOX_WITH_NESTED_HWVIRT_VMX_EPT)
5373 PVMCPUCC pVCpu = VMMGetCpu(pVM);
5374 Assert(pVCpu->pgm.s.enmGuestSlatMode == PGMSLAT_DIRECT || PGMPOOL_PAGE_IS_KIND_NESTED(enmKind));
5375#endif
5376
5377 PGM_LOCK_VOID(pVM);
5378
5379 if (pPool->fCacheEnabled)
5380 {
5381 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, fA20Enabled, iUser, iUserTable, ppPage);
5382 if (RT_SUCCESS(rc2))
5383 {
5384 if (fLockPage)
5385 pgmPoolLockPage(pPool, *ppPage);
5386 PGM_UNLOCK(pVM);
5387 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5388 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
5389 return rc2;
5390 }
5391 }
5392
5393 /*
5394 * Allocate a new one.
5395 */
5396 int rc = VINF_SUCCESS;
5397 uint16_t iNew = pPool->iFreeHead;
5398 if (iNew == NIL_PGMPOOL_IDX)
5399 {
5400 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
5401 if (RT_FAILURE(rc))
5402 {
5403 PGM_UNLOCK(pVM);
5404 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
5405 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5406 return rc;
5407 }
5408 iNew = pPool->iFreeHead;
5409 AssertReleaseMsgReturn(iNew != NIL_PGMPOOL_IDX, ("iNew=%#x\n", iNew), VERR_PGM_POOL_IPE);
5410 }
5411
5412 /* unlink the free head */
5413 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5414 pPool->iFreeHead = pPage->iNext;
5415 pPage->iNext = NIL_PGMPOOL_IDX;
5416
5417 /*
5418 * Initialize it.
5419 */
5420 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5421 pPage->enmKind = enmKind;
5422 pPage->enmAccess = enmAccess;
5423 pPage->GCPhys = GCPhys;
5424 pPage->fA20Enabled = fA20Enabled;
5425 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5426 pPage->fMonitored = false;
5427 pPage->fCached = false;
5428 pPage->fDirty = false;
5429 pPage->fReusedFlushPending = false;
5430 pPage->cModifications = 0;
5431 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5432 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5433 pPage->cPresent = 0;
5434 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5435 pPage->idxDirtyEntry = 0;
5436 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5437 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5438 pPage->cLastAccessHandler = 0;
5439 pPage->cLocked = 0;
5440# ifdef VBOX_STRICT
5441 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5442# endif
5443
5444 /*
5445 * Insert into the tracking and cache. If this fails, free the page.
5446 */
5447 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5448 if (RT_FAILURE(rc3))
5449 {
5450 pPool->cUsedPages--;
5451 pPage->enmKind = PGMPOOLKIND_FREE;
5452 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5453 pPage->GCPhys = NIL_RTGCPHYS;
5454 pPage->iNext = pPool->iFreeHead;
5455 pPool->iFreeHead = pPage->idx;
5456 PGM_UNLOCK(pVM);
5457 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5458 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
5459 return rc3;
5460 }
5461
5462 /*
5463 * Commit the allocation, clear the page and return.
5464 */
5465#ifdef VBOX_WITH_STATISTICS
5466 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5467 pPool->cUsedPagesHigh = pPool->cUsedPages;
5468#endif
5469
5470 if (!pPage->fZeroed)
5471 {
5472 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5473 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5474 ASMMemZeroPage(pv);
5475 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5476 }
5477
5478 *ppPage = pPage;
5479 if (fLockPage)
5480 pgmPoolLockPage(pPool, pPage);
5481 PGM_UNLOCK(pVM);
5482 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5483 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5484 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5485 return rc;
5486}
5487
5488
5489/**
5490 * Frees a usage of a pool page.
5491 *
5492 * @param pVM The cross context VM structure.
5493 * @param HCPhys The HC physical address of the shadow page.
5494 * @param iUser The shadow page pool index of the user table.
5495 * NIL_PGMPOOL_IDX if root page.
5496 * @param iUserTable The index into the user table (shadowed). Ignored if
5497 * root page.
5498 */
5499void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5500{
5501 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5502 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5503 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5504}
5505
5506
5507/**
5508 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5509 *
5510 * @returns Pointer to the shadow page structure.
5511 * @param pPool The pool.
5512 * @param HCPhys The HC physical address of the shadow page.
5513 */
5514PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5515{
5516 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5517
5518 /*
5519 * Look up the page.
5520 */
5521 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5522
5523 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5524 return pPage;
5525}
5526
5527
5528/**
5529 * Internal worker for finding a page for debugging purposes, no assertions.
5530 *
5531 * @returns Pointer to the shadow page structure. NULL on if not found.
5532 * @param pPool The pool.
5533 * @param HCPhys The HC physical address of the shadow page.
5534 */
5535PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5536{
5537 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5538 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5539}
5540
5541
5542/**
5543 * Internal worker for PGM_HCPHYS_2_PTR.
5544 *
5545 * @returns VBox status code.
5546 * @param pVM The cross context VM structure.
5547 * @param HCPhys The HC physical address of the shadow page.
5548 * @param ppv Where to return the address.
5549 */
5550int pgmPoolHCPhys2Ptr(PVM pVM, RTHCPHYS HCPhys, void **ppv)
5551{
5552 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pVM->pgm.s.CTX_SUFF(pPool)->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5553 AssertMsgReturn(pPage && pPage->enmKind != PGMPOOLKIND_FREE,
5554 ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0),
5555 VERR_PGM_POOL_GET_PAGE_FAILED);
5556 *ppv = (uint8_t *)pPage->CTX_SUFF(pvPage) + (HCPhys & PAGE_OFFSET_MASK);
5557 return VINF_SUCCESS;
5558}
5559
5560#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5561
5562/**
5563 * Flush the specified page if present
5564 *
5565 * @param pVM The cross context VM structure.
5566 * @param GCPhys Guest physical address of the page to flush
5567 */
5568void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5569{
5570 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5571
5572 VM_ASSERT_EMT(pVM);
5573
5574 /*
5575 * Look up the GCPhys in the hash.
5576 */
5577 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5578 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5579 if (i == NIL_PGMPOOL_IDX)
5580 return;
5581
5582 do
5583 {
5584 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5585 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5586 {
5587 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage)); /* Temporary to see if it hits. Remove later. */
5588 switch (pPage->enmKind)
5589 {
5590 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5591 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5592 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5593 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5594 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5595 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5596 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5597 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5598 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5599 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5600 case PGMPOOLKIND_64BIT_PML4:
5601 case PGMPOOLKIND_32BIT_PD:
5602 case PGMPOOLKIND_PAE_PDPT:
5603 {
5604 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5605# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5606 if (pPage->fDirty)
5607 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5608 else
5609# endif
5610 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5611 Assert(!pgmPoolIsPageLocked(pPage));
5612 pgmPoolMonitorChainFlush(pPool, pPage);
5613 return;
5614 }
5615
5616 /* ignore, no monitoring. */
5617 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5618 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5619 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5620 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5621 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5622 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5623 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5624 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5625 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5626 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5627 case PGMPOOLKIND_ROOT_NESTED:
5628 case PGMPOOLKIND_PAE_PD_PHYS:
5629 case PGMPOOLKIND_PAE_PDPT_PHYS:
5630 case PGMPOOLKIND_32BIT_PD_PHYS:
5631 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5632 break;
5633
5634 default:
5635 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5636 }
5637 }
5638
5639 /* next */
5640 i = pPage->iNext;
5641 } while (i != NIL_PGMPOOL_IDX);
5642 return;
5643}
5644
5645
5646/**
5647 * Reset CPU on hot plugging.
5648 *
5649 * @param pVM The cross context VM structure.
5650 * @param pVCpu The cross context virtual CPU structure.
5651 */
5652void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5653{
5654 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5655
5656 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5657 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5658 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5659}
5660
5661
5662/**
5663 * Flushes the entire cache.
5664 *
5665 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5666 * this and execute this CR3 flush.
5667 *
5668 * @param pVM The cross context VM structure.
5669 */
5670void pgmR3PoolReset(PVM pVM)
5671{
5672 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5673
5674 PGM_LOCK_ASSERT_OWNER(pVM);
5675 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5676 LogFlow(("pgmR3PoolReset:\n"));
5677
5678 /*
5679 * If there are no pages in the pool, there is nothing to do.
5680 */
5681 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5682 {
5683 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5684 return;
5685 }
5686
5687 /*
5688 * Exit the shadow mode since we're going to clear everything,
5689 * including the root page.
5690 */
5691 VMCC_FOR_EACH_VMCPU(pVM)
5692 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5693 VMCC_FOR_EACH_VMCPU_END(pVM);
5694
5695
5696 /*
5697 * Nuke the free list and reinsert all pages into it.
5698 */
5699 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5700 {
5701 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5702
5703 if (pPage->fMonitored)
5704 pgmPoolMonitorFlush(pPool, pPage);
5705 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5706 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5707 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5708 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5709 pPage->GCPhys = NIL_RTGCPHYS;
5710 pPage->enmKind = PGMPOOLKIND_FREE;
5711 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5712 Assert(pPage->idx == i);
5713 pPage->iNext = i + 1;
5714 pPage->fA20Enabled = true;
5715 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5716 pPage->fSeenNonGlobal = false;
5717 pPage->fMonitored = false;
5718 pPage->fDirty = false;
5719 pPage->fCached = false;
5720 pPage->fReusedFlushPending = false;
5721 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5722 pPage->cPresent = 0;
5723 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5724 pPage->cModifications = 0;
5725 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5726 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5727 pPage->idxDirtyEntry = 0;
5728 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5729 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5730 pPage->cLastAccessHandler = 0;
5731 pPage->cLocked = 0;
5732# ifdef VBOX_STRICT
5733 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5734# endif
5735 }
5736 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5737 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5738 pPool->cUsedPages = 0;
5739
5740 /*
5741 * Zap and reinitialize the user records.
5742 */
5743 pPool->cPresent = 0;
5744 pPool->iUserFreeHead = 0;
5745 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5746 const unsigned cMaxUsers = pPool->cMaxUsers;
5747 for (unsigned i = 0; i < cMaxUsers; i++)
5748 {
5749 paUsers[i].iNext = i + 1;
5750 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5751 paUsers[i].iUserTable = 0xfffffffe;
5752 }
5753 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5754
5755 /*
5756 * Clear all the GCPhys links and rebuild the phys ext free list.
5757 */
5758 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
5759 pRam;
5760 pRam = pRam->CTX_SUFF(pNext))
5761 {
5762 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5763 while (iPage-- > 0)
5764 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5765 }
5766
5767 pPool->iPhysExtFreeHead = 0;
5768 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5769 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5770 for (unsigned i = 0; i < cMaxPhysExts; i++)
5771 {
5772 paPhysExts[i].iNext = i + 1;
5773 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5774 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5775 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5776 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5777 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5778 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5779 }
5780 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5781
5782 /*
5783 * Just zap the modified list.
5784 */
5785 pPool->cModifiedPages = 0;
5786 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5787
5788 /*
5789 * Clear the GCPhys hash and the age list.
5790 */
5791 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5792 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5793 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5794 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5795
5796# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5797 /* Clear all dirty pages. */
5798 pPool->idxFreeDirtyPage = 0;
5799 pPool->cDirtyPages = 0;
5800 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aidxDirtyPages); i++)
5801 pPool->aidxDirtyPages[i] = NIL_PGMPOOL_IDX;
5802# endif
5803
5804 /*
5805 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5806 */
5807 VMCC_FOR_EACH_VMCPU(pVM)
5808 {
5809 /*
5810 * Re-enter the shadowing mode and assert Sync CR3 FF.
5811 */
5812 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5813 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5814 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5815 }
5816 VMCC_FOR_EACH_VMCPU_END(pVM);
5817
5818 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5819}
5820
5821#endif /* IN_RING3 */
5822
5823#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5824/**
5825 * Stringifies a PGMPOOLKIND value.
5826 */
5827static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5828{
5829 switch ((PGMPOOLKIND)enmKind)
5830 {
5831 case PGMPOOLKIND_INVALID:
5832 return "PGMPOOLKIND_INVALID";
5833 case PGMPOOLKIND_FREE:
5834 return "PGMPOOLKIND_FREE";
5835 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5836 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5837 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5838 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5839 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5840 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5841 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5842 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5843 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5844 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5845 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5846 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5847 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5848 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5849 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5850 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5851 case PGMPOOLKIND_32BIT_PD:
5852 return "PGMPOOLKIND_32BIT_PD";
5853 case PGMPOOLKIND_32BIT_PD_PHYS:
5854 return "PGMPOOLKIND_32BIT_PD_PHYS";
5855 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5856 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5857 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5858 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5859 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5860 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5861 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5862 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5863 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5864 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5865 case PGMPOOLKIND_PAE_PD_PHYS:
5866 return "PGMPOOLKIND_PAE_PD_PHYS";
5867 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5868 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5869 case PGMPOOLKIND_PAE_PDPT:
5870 return "PGMPOOLKIND_PAE_PDPT";
5871 case PGMPOOLKIND_PAE_PDPT_PHYS:
5872 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5873 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5874 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5875 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5876 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5877 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5878 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5879 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5880 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5881 case PGMPOOLKIND_64BIT_PML4:
5882 return "PGMPOOLKIND_64BIT_PML4";
5883 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5884 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5885 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5886 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5887 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5888 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5889 case PGMPOOLKIND_ROOT_NESTED:
5890 return "PGMPOOLKIND_ROOT_NESTED";
5891 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
5892 return "PGMPOOLKIND_EPT_PT_FOR_EPT_PT";
5893 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
5894 return "PGMPOOLKIND_EPT_PT_FOR_EPT_2MB";
5895 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
5896 return "PGMPOOLKIND_EPT_PD_FOR_EPT_PD";
5897 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
5898 return "PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT";
5899 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
5900 return "PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4";
5901 }
5902 return "Unknown kind!";
5903}
5904#endif /* LOG_ENABLED || VBOX_STRICT */
5905
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette